Index all PDF imports into vector store with type metadata

This commit is contained in:
Juan José Mata
2026-02-11 15:53:45 +01:00
parent 4e4ca916a1
commit e7c609dc91
4 changed files with 106 additions and 2 deletions

View File

@@ -11,6 +11,7 @@ class ProcessPdfJob < ApplicationJob
begin
pdf_import.process_with_ai
upload_to_vector_store(pdf_import)
# For bank statements, extract transactions and generate import rows
if pdf_import.bank_statement?
@@ -58,4 +59,19 @@ class ProcessPdfJob < ApplicationJob
error: error.class.name.demodulize)
end
end
def upload_to_vector_store(pdf_import)
filename = pdf_import.pdf_file.filename.to_s
file_content = pdf_import.pdf_file_content
family_document = pdf_import.family.upload_document(
file_content: file_content,
filename: filename,
metadata: { "type" => pdf_import.document_type }
)
return if family_document
Rails.logger.warn("ProcessPdfJob: Vector store upload failed for import #{pdf_import.id}")
end
end

View File

@@ -37,7 +37,7 @@ module Family::VectorSearchable
response.success? ? response.data : []
end
def upload_document(file_content:, filename:)
def upload_document(file_content:, filename:, metadata: {})
adapter = vector_store_adapter
return nil unless adapter
@@ -57,7 +57,8 @@ module Family::VectorSearchable
content_type: Marcel::MimeType.for(name: filename),
file_size: file_content.bytesize,
provider_file_id: response.data[:file_id],
status: "ready"
status: "ready",
metadata: metadata || {}
)
end