diff --git a/app/controllers/import/configurations_controller.rb b/app/controllers/import/configurations_controller.rb index 12e47a477..6602e3fbe 100644 --- a/app/controllers/import/configurations_controller.rb +++ b/app/controllers/import/configurations_controller.rb @@ -4,6 +4,8 @@ class Import::ConfigurationsController < ApplicationController before_action :set_import def show + # PDF imports are auto-configured from AI extraction, skip to clean step + redirect_to import_clean_path(@import) if @import.is_a?(PdfImport) end def update diff --git a/app/controllers/imports_controller.rb b/app/controllers/imports_controller.rb index 88a346838..f1a217529 100644 --- a/app/controllers/imports_controller.rb +++ b/app/controllers/imports_controller.rb @@ -1,7 +1,23 @@ class ImportsController < ApplicationController include SettingsHelper - before_action :set_import, only: %i[show publish destroy revert apply_template] + before_action :set_import, only: %i[show update publish destroy revert apply_template] + + def update + # Handle both pdf_import[account_id] and import[account_id] param formats + account_id = params.dig(:pdf_import, :account_id) || params.dig(:import, :account_id) + + if account_id.present? + account = Current.family.accounts.find_by(id: account_id) + unless account + redirect_back_or_to import_path(@import), alert: t("imports.update.invalid_account", default: "Account not found.") + return + end + @import.update!(account: account) + end + + redirect_to import_path(@import), notice: t("imports.update.account_saved", default: "Account saved.") + end def publish @import.publish_later diff --git a/app/jobs/process_pdf_job.rb b/app/jobs/process_pdf_job.rb index 25c31f11f..8fb4fccef 100644 --- a/app/jobs/process_pdf_job.rb +++ b/app/jobs/process_pdf_job.rb @@ -5,18 +5,22 @@ class ProcessPdfJob < ApplicationJob return unless pdf_import.is_a?(PdfImport) return unless pdf_import.pdf_uploaded? return if pdf_import.status == "complete" - return if pdf_import.ai_processed? && (!pdf_import.bank_statement? || pdf_import.has_extracted_transactions?) + return if pdf_import.ai_processed? && (!pdf_import.bank_statement? || pdf_import.rows_count > 0) pdf_import.update!(status: :importing) begin pdf_import.process_with_ai - # For bank statements, extract transactions + # For bank statements, extract transactions and generate import rows if pdf_import.bank_statement? Rails.logger.info("ProcessPdfJob: Extracting transactions for bank statement import #{pdf_import.id}") pdf_import.extract_transactions Rails.logger.info("ProcessPdfJob: Extracted #{pdf_import.extracted_transactions.size} transactions") + + pdf_import.generate_rows_from_extracted_data + pdf_import.sync_mappings + Rails.logger.info("ProcessPdfJob: Generated #{pdf_import.rows_count} import rows") end # Find the user who created this import (first admin or any user in the family) @@ -26,7 +30,10 @@ class ProcessPdfJob < ApplicationJob pdf_import.send_next_steps_email(user) end - pdf_import.update!(status: :complete) + # Bank statements with rows go to pending for user review/publish + # Non-bank statements are marked complete (no further action needed) + final_status = pdf_import.bank_statement? && pdf_import.rows_count > 0 ? :pending : :complete + pdf_import.update!(status: final_status) rescue StandardError => e sanitized_error = sanitize_error_message(e) Rails.logger.error("PDF processing failed for import #{pdf_import.id}: #{e.class.name} - #{sanitized_error}") diff --git a/app/models/pdf_import.rb b/app/models/pdf_import.rb index 8b25e8bfa..0e0250462 100644 --- a/app/models/pdf_import.rb +++ b/app/models/pdf_import.rb @@ -3,6 +3,34 @@ class PdfImport < Import validates :document_type, inclusion: { in: DOCUMENT_TYPES }, allow_nil: true + def import! + raise "Account required for PDF import" unless account.present? + + transaction do + mappings.each(&:create_mappable!) + + new_transactions = rows.map do |row| + category = mappings.categories.mappable_for(row.category) + + Transaction.new( + category: category, + entry: Entry.new( + account: account, + date: row.date_iso, + amount: row.signed_amount, + name: row.name, + currency: row.currency, + notes: row.notes, + import: self, + import_locked: true + ) + ) + end + + Transaction.import!(new_transactions, recursive: true) if new_transactions.any? + end + end + def pdf_uploaded? pdf_file.attached? end @@ -71,6 +99,34 @@ class PdfImport < Import extracted_data&.dig("transactions") || [] end + def generate_rows_from_extracted_data + transaction do + rows.destroy_all + + unless has_extracted_transactions? + update_column(:rows_count, 0) + return + end + + currency = account&.currency || family.currency + + mapped_rows = extracted_transactions.map do |txn| + { + import_id: id, + date: format_date_for_import(txn["date"]), + amount: txn["amount"].to_s, + name: txn["name"].to_s, + category: txn["category"].to_s, + notes: txn["notes"].to_s, + currency: currency + } + end + + Import::Row.insert_all!(mapped_rows) if mapped_rows.any? + update_column(:rows_count, mapped_rows.size) + end + end + def send_next_steps_email(user) PdfImportMailer.with( user: user, @@ -83,19 +139,19 @@ class PdfImport < Import end def configured? - ai_processed? + ai_processed? && rows_count > 0 end def cleaned? - ai_processed? + configured? && rows.all?(&:valid?) end def publishable? - false + account.present? && bank_statement? && cleaned? && mappings.all?(&:valid?) end def column_keys - [] + %i[date amount name category notes] end def requires_csv_workflow? @@ -107,4 +163,27 @@ class PdfImport < Import pdf_file.download end + + def required_column_keys + %i[date amount] + end + + def mapping_steps + base = [] + # Only include CategoryMapping if rows have non-empty categories + base << Import::CategoryMapping if rows.where.not(category: [ nil, "" ]).exists? + # Note: PDF imports use direct account selection in the UI, not AccountMapping + # AccountMapping is designed for CSV imports where rows have different account values + base + end + + private + + def format_date_for_import(date_str) + return "" if date_str.blank? + + Date.parse(date_str).strftime(date_format) + rescue ArgumentError + date_str.to_s + end end diff --git a/app/models/provider/openai.rb b/app/models/provider/openai.rb index 08ac224f9..6ec10333d 100644 --- a/app/models/provider/openai.rb +++ b/app/models/provider/openai.rb @@ -118,21 +118,20 @@ class Provider::Openai < Provider # Can be disabled via ENV for OpenAI-compatible endpoints that don't support vision # Only vision-capable models (gpt-4o, gpt-4-turbo, gpt-4.1, etc.) support PDF input - def supports_pdf_processing? + def supports_pdf_processing?(model: @default_model) return false unless ENV.fetch("OPENAI_SUPPORTS_PDF_PROCESSING", "true").to_s.downcase.in?(%w[true 1 yes]) # Custom providers manage their own model capabilities return true if custom_provider? - # Check if the configured model supports vision/PDF input - VISION_CAPABLE_MODEL_PREFIXES.any? { |prefix| @default_model.start_with?(prefix) } + # Check if the specified model supports vision/PDF input + VISION_CAPABLE_MODEL_PREFIXES.any? { |prefix| model.start_with?(prefix) } end def process_pdf(pdf_content:, model: "", family: nil) - raise "Model does not support PDF/vision processing" unless supports_pdf_processing? - with_provider_response do effective_model = model.presence || @default_model + raise Error, "Model does not support PDF/vision processing: #{effective_model}" unless supports_pdf_processing?(model: effective_model) trace = create_langfuse_trace( name: "openai.process_pdf", diff --git a/app/models/provider/openai/pdf_processor.rb b/app/models/provider/openai/pdf_processor.rb index b99caa77c..f65510e87 100644 --- a/app/models/provider/openai/pdf_processor.rb +++ b/app/models/provider/openai/pdf_processor.rb @@ -42,7 +42,7 @@ class Provider::Openai::PdfProcessor For each document, you must determine: 1. **Document Type**: Classify the document as one of the following: - - `bank_statement`: A bank account statement showing transactions, balances, and account activity + - `bank_statement`: A bank account statement showing transactions, balances, and account activity. This includes mobile money statements (like M-PESA, Venmo, PayPal, Cash App), digital wallet statements, and any statement showing a list of financial transactions with dates and amounts. - `credit_card_statement`: A credit card statement showing charges, payments, and balances - `investment_statement`: An investment/brokerage statement showing holdings, trades, or portfolio performance - `financial_document`: General financial documents like tax forms, receipts, invoices, or financial reports diff --git a/app/views/imports/_nav.html.erb b/app/views/imports/_nav.html.erb index 898c78255..26435d8b6 100644 --- a/app/views/imports/_nav.html.erb +++ b/app/views/imports/_nav.html.erb @@ -1,18 +1,29 @@ <%# locals: (import:) %> -<% steps = [ - { name: "Upload", path: import_upload_path(import), is_complete: import.uploaded?, step_number: 1 }, - { name: "Configure", path: import_configuration_path(import), is_complete: import.configured?, step_number: 2 }, - { name: "Clean", path: import_clean_path(import), is_complete: import.cleaned?, step_number: 3 }, - { name: "Map", path: import_confirm_path(import), is_complete: import.publishable?, step_number: 4 }, - { name: "Confirm", path: import_path(import), is_complete: import.complete?, step_number: 5 } -].reject { |step| step[:name] == "Map" && import.mapping_steps.empty? } %> +<% steps = if import.is_a?(PdfImport) + # PDF imports have a simplified flow: Upload -> Confirm + # Upload/Configure/Clean are always complete for processed PDF imports + [ + { name: t("imports.steps.upload", default: "Upload"), path: nil, is_complete: import.pdf_uploaded?, step_number: 1 }, + { name: t("imports.steps.configure", default: "Configure"), path: nil, is_complete: import.configured?, step_number: 2 }, + { name: t("imports.steps.clean", default: "Clean"), path: nil, is_complete: import.cleaned?, step_number: 3 }, + { name: t("imports.steps.confirm", default: "Confirm"), path: import_path(import), is_complete: import.complete?, step_number: 4 } + ] +else + [ + { name: t("imports.steps.upload", default: "Upload"), path: import_upload_path(import), is_complete: import.uploaded?, step_number: 1 }, + { name: t("imports.steps.configure", default: "Configure"), path: import_configuration_path(import), is_complete: import.configured?, step_number: 2 }, + { name: t("imports.steps.clean", default: "Clean"), path: import_clean_path(import), is_complete: import.cleaned?, step_number: 3 }, + { name: t("imports.steps.map", default: "Map"), key: "Map", path: import_confirm_path(import), is_complete: import.publishable?, step_number: 4 }, + { name: t("imports.steps.confirm", default: "Confirm"), path: import_path(import), is_complete: import.complete?, step_number: 5 } + ].reject { |step| step[:key] == "Map" && import.mapping_steps.empty? } +end %> <% content_for :mobile_import_progress do %> - <% active_step = steps.detect { |s| request.path.eql?(s[:path]) } %> - <% if active_step.present? %> + <% active_step_index = steps.index { |s| request.path.eql?(s[:match_path] || s[:path]) } %> + <% if active_step_index %>
- Step <%= active_step[:step_number] %> of <%= steps.size %> + <%= t("imports.steps.progress", step: active_step_index + 1, total: steps.size, default: "Step %{step} of %{total}") %>
<% end %> <% end %> @@ -20,7 +31,7 @@