Files
sure/app/jobs/process_pdf_job.rb
soky srm d79d86d848 PDF ai import (#1006)
Add support to review transactions for AI pdf import
2026-02-16 21:11:41 +01:00

89 lines
3.3 KiB
Ruby

class ProcessPdfJob < ApplicationJob
queue_as :medium_priority
def perform(pdf_import)
return unless pdf_import.is_a?(PdfImport)
return unless pdf_import.pdf_uploaded?
return if pdf_import.status == "complete"
return if pdf_import.ai_processed? && (!pdf_import.statement_with_transactions? || pdf_import.rows_count > 0)
pdf_import.update!(status: :importing)
begin
process_result = pdf_import.process_with_ai
document_type = resolve_document_type(pdf_import, process_result)
upload_to_vector_store(pdf_import, document_type: document_type)
# For statements with transactions (bank/credit card), extract and generate import rows
if statement_with_transactions?(document_type)
Rails.logger.info("ProcessPdfJob: Extracting transactions for #{document_type} import #{pdf_import.id}")
pdf_import.extract_transactions
Rails.logger.info("ProcessPdfJob: Extracted #{pdf_import.extracted_transactions.size} transactions")
pdf_import.generate_rows_from_extracted_data
pdf_import.sync_mappings
Rails.logger.info("ProcessPdfJob: Generated #{pdf_import.rows_count} import rows")
end
# Find the user who created this import (first admin or any user in the family)
user = pdf_import.family.users.find_by(role: :admin) || pdf_import.family.users.first
if user
pdf_import.send_next_steps_email(user)
end
# Statements with extracted rows go to pending for user review/publish
# Other document types are marked complete (no further action needed)
final_status = statement_with_transactions?(document_type) && pdf_import.rows_count > 0 ? :pending : :complete
pdf_import.update!(status: final_status)
rescue StandardError => e
sanitized_error = sanitize_error_message(e)
Rails.logger.error("PDF processing failed for import #{pdf_import.id}: #{e.class.name} - #{sanitized_error}")
begin
pdf_import.update!(status: :failed, error: sanitized_error)
rescue StandardError => update_error
Rails.logger.error("Failed to update import status: #{update_error.message}")
end
raise
end
end
private
def sanitize_error_message(error)
case error
when RuntimeError, ArgumentError
I18n.t("imports.pdf_import.processing_failed_with_message",
message: error.message.truncate(500))
else
I18n.t("imports.pdf_import.processing_failed_generic",
error: error.class.name.demodulize)
end
end
def upload_to_vector_store(pdf_import, document_type:)
filename = pdf_import.pdf_file.filename.to_s
file_content = pdf_import.pdf_file_content
family_document = pdf_import.family.upload_document(
file_content: file_content,
filename: filename,
metadata: { "type" => document_type }
)
return if family_document
Rails.logger.warn("ProcessPdfJob: Vector store upload failed for import #{pdf_import.id}")
end
def resolve_document_type(pdf_import, process_result)
return process_result.document_type if process_result.respond_to?(:document_type) && process_result.document_type.present?
pdf_import.reload.document_type
end
def statement_with_transactions?(document_type)
document_type.in?(%w[bank_statement credit_card_statement])
end
end