diff --git a/app/jobs/process_pdf_job.rb b/app/jobs/process_pdf_job.rb index 8fb4fccef..b1e35c19b 100644 --- a/app/jobs/process_pdf_job.rb +++ b/app/jobs/process_pdf_job.rb @@ -11,6 +11,7 @@ class ProcessPdfJob < ApplicationJob begin pdf_import.process_with_ai + upload_to_vector_store(pdf_import) # For bank statements, extract transactions and generate import rows if pdf_import.bank_statement? @@ -58,4 +59,19 @@ class ProcessPdfJob < ApplicationJob error: error.class.name.demodulize) end end + + def upload_to_vector_store(pdf_import) + filename = pdf_import.pdf_file.filename.to_s + file_content = pdf_import.pdf_file_content + + family_document = pdf_import.family.upload_document( + file_content: file_content, + filename: filename, + metadata: { "type" => pdf_import.document_type } + ) + + return if family_document + + Rails.logger.warn("ProcessPdfJob: Vector store upload failed for import #{pdf_import.id}") + end end diff --git a/app/models/family/vector_searchable.rb b/app/models/family/vector_searchable.rb index 4f22d47fe..73b3946c4 100644 --- a/app/models/family/vector_searchable.rb +++ b/app/models/family/vector_searchable.rb @@ -37,7 +37,7 @@ module Family::VectorSearchable response.success? ? response.data : [] end - def upload_document(file_content:, filename:) + def upload_document(file_content:, filename:, metadata: {}) adapter = vector_store_adapter return nil unless adapter @@ -57,7 +57,8 @@ module Family::VectorSearchable content_type: Marcel::MimeType.for(name: filename), file_size: file_content.bytesize, provider_file_id: response.data[:file_id], - status: "ready" + status: "ready", + metadata: metadata || {} ) end diff --git a/test/jobs/process_pdf_job_test.rb b/test/jobs/process_pdf_job_test.rb index c6374de23..763d45078 100644 --- a/test/jobs/process_pdf_job_test.rb +++ b/test/jobs/process_pdf_job_test.rb @@ -32,4 +32,62 @@ class ProcessPdfJobTest < ActiveJob::TestCase # Should not change status since already complete assert_equal "complete", processed_import.reload.status end + + test "uploads non-bank PDF to vector store with classified type metadata" do + pdf_content = attach_pdf!(@import) + + @import.stubs(:process_with_ai) do + @import.update!(ai_summary: "A tax return", document_type: "financial_document") + end + @import.stubs(:send_next_steps_email) + @import.expects(:extract_transactions).never + + @family.expects(:upload_document).with do |file_content:, filename:, metadata:| + assert_equal pdf_content, file_content + assert_equal "sample_bank_statement.pdf", filename + assert_equal({ "type" => "financial_document" }, metadata) + true + end.returns(family_documents(:tax_return)) + + ProcessPdfJob.perform_now(@import) + + assert_equal "complete", @import.reload.status + end + + test "uploads bank statement PDF to vector store with classified type metadata" do + pdf_content = attach_pdf!(@import) + + @import.stubs(:process_with_ai) do + @import.update!(ai_summary: "A bank statement", document_type: "bank_statement") + end + @import.expects(:extract_transactions).once + @import.expects(:generate_rows_from_extracted_data).once do + @import.update_column(:rows_count, 1) + end + @import.expects(:sync_mappings).once + @import.stubs(:send_next_steps_email) + + @family.expects(:upload_document).with do |file_content:, filename:, metadata:| + assert_equal pdf_content, file_content + assert_equal "sample_bank_statement.pdf", filename + assert_equal({ "type" => "bank_statement" }, metadata) + true + end.returns(family_documents(:tax_return)) + + ProcessPdfJob.perform_now(@import) + + assert_equal "pending", @import.reload.status + end + + private + + def attach_pdf!(import) + pdf_content = file_fixture("imports/sample_bank_statement.pdf").binread + import.pdf_file.attach( + io: StringIO.new(pdf_content), + filename: "sample_bank_statement.pdf", + content_type: "application/pdf" + ) + pdf_content + end end diff --git a/test/models/family_test.rb b/test/models/family_test.rb index 75761fb2e..b0fb3632e 100644 --- a/test/models/family_test.rb +++ b/test/models/family_test.rb @@ -43,4 +43,33 @@ class FamilyTest < ActiveSupport::TestCase assert_includes family.available_merchants, new_merchant end + + test "upload_document stores provided metadata on family document" do + family = families(:dylan_family) + family.update!(vector_store_id: nil) + + adapter = mock("vector_store_adapter") + adapter.expects(:create_store).with(name: "Family #{family.id} Documents").returns( + VectorStore::Response.new(success?: true, data: { id: "vs_test123" }, error: nil) + ) + adapter.expects(:upload_file).with( + store_id: "vs_test123", + file_content: "hello", + filename: "notes.txt" + ).returns( + VectorStore::Response.new(success?: true, data: { file_id: "file-xyz" }, error: nil) + ) + + VectorStore::Registry.stubs(:adapter).returns(adapter) + + document = family.upload_document( + file_content: "hello", + filename: "notes.txt", + metadata: { "type" => "financial_document" } + ) + + assert_not_nil document + assert_equal({ "type" => "financial_document" }, document.metadata) + assert_equal "vs_test123", family.reload.vector_store_id + end end