mirror of
https://github.com/we-promise/sure.git
synced 2026-04-19 03:54:08 +00:00
Index all PDF imports into vector store with type metadata
This commit is contained in:
@@ -11,6 +11,7 @@ class ProcessPdfJob < ApplicationJob
|
|||||||
|
|
||||||
begin
|
begin
|
||||||
pdf_import.process_with_ai
|
pdf_import.process_with_ai
|
||||||
|
upload_to_vector_store(pdf_import)
|
||||||
|
|
||||||
# For bank statements, extract transactions and generate import rows
|
# For bank statements, extract transactions and generate import rows
|
||||||
if pdf_import.bank_statement?
|
if pdf_import.bank_statement?
|
||||||
@@ -58,4 +59,19 @@ class ProcessPdfJob < ApplicationJob
|
|||||||
error: error.class.name.demodulize)
|
error: error.class.name.demodulize)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def upload_to_vector_store(pdf_import)
|
||||||
|
filename = pdf_import.pdf_file.filename.to_s
|
||||||
|
file_content = pdf_import.pdf_file_content
|
||||||
|
|
||||||
|
family_document = pdf_import.family.upload_document(
|
||||||
|
file_content: file_content,
|
||||||
|
filename: filename,
|
||||||
|
metadata: { "type" => pdf_import.document_type }
|
||||||
|
)
|
||||||
|
|
||||||
|
return if family_document
|
||||||
|
|
||||||
|
Rails.logger.warn("ProcessPdfJob: Vector store upload failed for import #{pdf_import.id}")
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ module Family::VectorSearchable
|
|||||||
response.success? ? response.data : []
|
response.success? ? response.data : []
|
||||||
end
|
end
|
||||||
|
|
||||||
def upload_document(file_content:, filename:)
|
def upload_document(file_content:, filename:, metadata: {})
|
||||||
adapter = vector_store_adapter
|
adapter = vector_store_adapter
|
||||||
return nil unless adapter
|
return nil unless adapter
|
||||||
|
|
||||||
@@ -57,7 +57,8 @@ module Family::VectorSearchable
|
|||||||
content_type: Marcel::MimeType.for(name: filename),
|
content_type: Marcel::MimeType.for(name: filename),
|
||||||
file_size: file_content.bytesize,
|
file_size: file_content.bytesize,
|
||||||
provider_file_id: response.data[:file_id],
|
provider_file_id: response.data[:file_id],
|
||||||
status: "ready"
|
status: "ready",
|
||||||
|
metadata: metadata || {}
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -32,4 +32,62 @@ class ProcessPdfJobTest < ActiveJob::TestCase
|
|||||||
# Should not change status since already complete
|
# Should not change status since already complete
|
||||||
assert_equal "complete", processed_import.reload.status
|
assert_equal "complete", processed_import.reload.status
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "uploads non-bank PDF to vector store with classified type metadata" do
|
||||||
|
pdf_content = attach_pdf!(@import)
|
||||||
|
|
||||||
|
@import.stubs(:process_with_ai) do
|
||||||
|
@import.update!(ai_summary: "A tax return", document_type: "financial_document")
|
||||||
|
end
|
||||||
|
@import.stubs(:send_next_steps_email)
|
||||||
|
@import.expects(:extract_transactions).never
|
||||||
|
|
||||||
|
@family.expects(:upload_document).with do |file_content:, filename:, metadata:|
|
||||||
|
assert_equal pdf_content, file_content
|
||||||
|
assert_equal "sample_bank_statement.pdf", filename
|
||||||
|
assert_equal({ "type" => "financial_document" }, metadata)
|
||||||
|
true
|
||||||
|
end.returns(family_documents(:tax_return))
|
||||||
|
|
||||||
|
ProcessPdfJob.perform_now(@import)
|
||||||
|
|
||||||
|
assert_equal "complete", @import.reload.status
|
||||||
|
end
|
||||||
|
|
||||||
|
test "uploads bank statement PDF to vector store with classified type metadata" do
|
||||||
|
pdf_content = attach_pdf!(@import)
|
||||||
|
|
||||||
|
@import.stubs(:process_with_ai) do
|
||||||
|
@import.update!(ai_summary: "A bank statement", document_type: "bank_statement")
|
||||||
|
end
|
||||||
|
@import.expects(:extract_transactions).once
|
||||||
|
@import.expects(:generate_rows_from_extracted_data).once do
|
||||||
|
@import.update_column(:rows_count, 1)
|
||||||
|
end
|
||||||
|
@import.expects(:sync_mappings).once
|
||||||
|
@import.stubs(:send_next_steps_email)
|
||||||
|
|
||||||
|
@family.expects(:upload_document).with do |file_content:, filename:, metadata:|
|
||||||
|
assert_equal pdf_content, file_content
|
||||||
|
assert_equal "sample_bank_statement.pdf", filename
|
||||||
|
assert_equal({ "type" => "bank_statement" }, metadata)
|
||||||
|
true
|
||||||
|
end.returns(family_documents(:tax_return))
|
||||||
|
|
||||||
|
ProcessPdfJob.perform_now(@import)
|
||||||
|
|
||||||
|
assert_equal "pending", @import.reload.status
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def attach_pdf!(import)
|
||||||
|
pdf_content = file_fixture("imports/sample_bank_statement.pdf").binread
|
||||||
|
import.pdf_file.attach(
|
||||||
|
io: StringIO.new(pdf_content),
|
||||||
|
filename: "sample_bank_statement.pdf",
|
||||||
|
content_type: "application/pdf"
|
||||||
|
)
|
||||||
|
pdf_content
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -43,4 +43,33 @@ class FamilyTest < ActiveSupport::TestCase
|
|||||||
|
|
||||||
assert_includes family.available_merchants, new_merchant
|
assert_includes family.available_merchants, new_merchant
|
||||||
end
|
end
|
||||||
|
|
||||||
|
test "upload_document stores provided metadata on family document" do
|
||||||
|
family = families(:dylan_family)
|
||||||
|
family.update!(vector_store_id: nil)
|
||||||
|
|
||||||
|
adapter = mock("vector_store_adapter")
|
||||||
|
adapter.expects(:create_store).with(name: "Family #{family.id} Documents").returns(
|
||||||
|
VectorStore::Response.new(success?: true, data: { id: "vs_test123" }, error: nil)
|
||||||
|
)
|
||||||
|
adapter.expects(:upload_file).with(
|
||||||
|
store_id: "vs_test123",
|
||||||
|
file_content: "hello",
|
||||||
|
filename: "notes.txt"
|
||||||
|
).returns(
|
||||||
|
VectorStore::Response.new(success?: true, data: { file_id: "file-xyz" }, error: nil)
|
||||||
|
)
|
||||||
|
|
||||||
|
VectorStore::Registry.stubs(:adapter).returns(adapter)
|
||||||
|
|
||||||
|
document = family.upload_document(
|
||||||
|
file_content: "hello",
|
||||||
|
filename: "notes.txt",
|
||||||
|
metadata: { "type" => "financial_document" }
|
||||||
|
)
|
||||||
|
|
||||||
|
assert_not_nil document
|
||||||
|
assert_equal({ "type" => "financial_document" }, document.metadata)
|
||||||
|
assert_equal "vs_test123", family.reload.vector_store_id
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user