mirror of
https://github.com/we-promise/sure.git
synced 2026-04-07 14:31:25 +00:00
Index all PDF imports into vector store with type metadata
This commit is contained in:
@@ -11,6 +11,7 @@ class ProcessPdfJob < ApplicationJob
|
||||
|
||||
begin
|
||||
pdf_import.process_with_ai
|
||||
upload_to_vector_store(pdf_import)
|
||||
|
||||
# For bank statements, extract transactions and generate import rows
|
||||
if pdf_import.bank_statement?
|
||||
@@ -58,4 +59,19 @@ class ProcessPdfJob < ApplicationJob
|
||||
error: error.class.name.demodulize)
|
||||
end
|
||||
end
|
||||
|
||||
def upload_to_vector_store(pdf_import)
|
||||
filename = pdf_import.pdf_file.filename.to_s
|
||||
file_content = pdf_import.pdf_file_content
|
||||
|
||||
family_document = pdf_import.family.upload_document(
|
||||
file_content: file_content,
|
||||
filename: filename,
|
||||
metadata: { "type" => pdf_import.document_type }
|
||||
)
|
||||
|
||||
return if family_document
|
||||
|
||||
Rails.logger.warn("ProcessPdfJob: Vector store upload failed for import #{pdf_import.id}")
|
||||
end
|
||||
end
|
||||
|
||||
@@ -37,7 +37,7 @@ module Family::VectorSearchable
|
||||
response.success? ? response.data : []
|
||||
end
|
||||
|
||||
def upload_document(file_content:, filename:)
|
||||
def upload_document(file_content:, filename:, metadata: {})
|
||||
adapter = vector_store_adapter
|
||||
return nil unless adapter
|
||||
|
||||
@@ -57,7 +57,8 @@ module Family::VectorSearchable
|
||||
content_type: Marcel::MimeType.for(name: filename),
|
||||
file_size: file_content.bytesize,
|
||||
provider_file_id: response.data[:file_id],
|
||||
status: "ready"
|
||||
status: "ready",
|
||||
metadata: metadata || {}
|
||||
)
|
||||
end
|
||||
|
||||
|
||||
@@ -32,4 +32,62 @@ class ProcessPdfJobTest < ActiveJob::TestCase
|
||||
# Should not change status since already complete
|
||||
assert_equal "complete", processed_import.reload.status
|
||||
end
|
||||
|
||||
test "uploads non-bank PDF to vector store with classified type metadata" do
|
||||
pdf_content = attach_pdf!(@import)
|
||||
|
||||
@import.stubs(:process_with_ai) do
|
||||
@import.update!(ai_summary: "A tax return", document_type: "financial_document")
|
||||
end
|
||||
@import.stubs(:send_next_steps_email)
|
||||
@import.expects(:extract_transactions).never
|
||||
|
||||
@family.expects(:upload_document).with do |file_content:, filename:, metadata:|
|
||||
assert_equal pdf_content, file_content
|
||||
assert_equal "sample_bank_statement.pdf", filename
|
||||
assert_equal({ "type" => "financial_document" }, metadata)
|
||||
true
|
||||
end.returns(family_documents(:tax_return))
|
||||
|
||||
ProcessPdfJob.perform_now(@import)
|
||||
|
||||
assert_equal "complete", @import.reload.status
|
||||
end
|
||||
|
||||
test "uploads bank statement PDF to vector store with classified type metadata" do
|
||||
pdf_content = attach_pdf!(@import)
|
||||
|
||||
@import.stubs(:process_with_ai) do
|
||||
@import.update!(ai_summary: "A bank statement", document_type: "bank_statement")
|
||||
end
|
||||
@import.expects(:extract_transactions).once
|
||||
@import.expects(:generate_rows_from_extracted_data).once do
|
||||
@import.update_column(:rows_count, 1)
|
||||
end
|
||||
@import.expects(:sync_mappings).once
|
||||
@import.stubs(:send_next_steps_email)
|
||||
|
||||
@family.expects(:upload_document).with do |file_content:, filename:, metadata:|
|
||||
assert_equal pdf_content, file_content
|
||||
assert_equal "sample_bank_statement.pdf", filename
|
||||
assert_equal({ "type" => "bank_statement" }, metadata)
|
||||
true
|
||||
end.returns(family_documents(:tax_return))
|
||||
|
||||
ProcessPdfJob.perform_now(@import)
|
||||
|
||||
assert_equal "pending", @import.reload.status
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def attach_pdf!(import)
|
||||
pdf_content = file_fixture("imports/sample_bank_statement.pdf").binread
|
||||
import.pdf_file.attach(
|
||||
io: StringIO.new(pdf_content),
|
||||
filename: "sample_bank_statement.pdf",
|
||||
content_type: "application/pdf"
|
||||
)
|
||||
pdf_content
|
||||
end
|
||||
end
|
||||
|
||||
@@ -43,4 +43,33 @@ class FamilyTest < ActiveSupport::TestCase
|
||||
|
||||
assert_includes family.available_merchants, new_merchant
|
||||
end
|
||||
|
||||
test "upload_document stores provided metadata on family document" do
|
||||
family = families(:dylan_family)
|
||||
family.update!(vector_store_id: nil)
|
||||
|
||||
adapter = mock("vector_store_adapter")
|
||||
adapter.expects(:create_store).with(name: "Family #{family.id} Documents").returns(
|
||||
VectorStore::Response.new(success?: true, data: { id: "vs_test123" }, error: nil)
|
||||
)
|
||||
adapter.expects(:upload_file).with(
|
||||
store_id: "vs_test123",
|
||||
file_content: "hello",
|
||||
filename: "notes.txt"
|
||||
).returns(
|
||||
VectorStore::Response.new(success?: true, data: { file_id: "file-xyz" }, error: nil)
|
||||
)
|
||||
|
||||
VectorStore::Registry.stubs(:adapter).returns(adapter)
|
||||
|
||||
document = family.upload_document(
|
||||
file_content: "hello",
|
||||
filename: "notes.txt",
|
||||
metadata: { "type" => "financial_document" }
|
||||
)
|
||||
|
||||
assert_not_nil document
|
||||
assert_equal({ "type" => "financial_document" }, document.metadata)
|
||||
assert_equal "vs_test123", family.reload.vector_store_id
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user