Generalize from PDF import to just files

This commit is contained in:
Juan José Mata
2026-02-11 18:38:29 +01:00
parent e7c609dc91
commit 0eedd533bb
4 changed files with 137 additions and 8 deletions

View File

@@ -38,11 +38,17 @@ class ImportsController < ApplicationController
def new
@pending_import = Current.family.imports.ordered.pending.first
@document_upload_extensions = document_upload_supported_extensions
end
def create
file = import_params[:import_file]
if file.present? && document_upload_request?
create_document_import(file)
return
end
# Handle PDF file uploads - process with AI
if file.present? && Import::ALLOWED_PDF_MIME_TYPES.include?(file.content_type)
unless valid_pdf_file?(file)
@@ -137,6 +143,60 @@ class ImportsController < ApplicationController
redirect_to import_path(pdf_import), notice: t("imports.create.pdf_processing")
end
def create_document_import(file)
adapter = VectorStore.adapter
unless adapter
redirect_to new_import_path, alert: t("imports.create.document_provider_not_configured")
return
end
if file.size > Import::MAX_PDF_SIZE
redirect_to new_import_path, alert: t("imports.create.document_too_large", max_size: Import::MAX_PDF_SIZE / 1.megabyte)
return
end
filename = file.original_filename.to_s
ext = File.extname(filename).downcase
supported_extensions = adapter.supported_extensions.map(&:downcase)
unless supported_extensions.include?(ext)
redirect_to new_import_path, alert: t("imports.create.invalid_document_file_type")
return
end
if ext == ".pdf"
unless valid_pdf_file?(file)
redirect_to new_import_path, alert: t("imports.create.invalid_pdf")
return
end
create_pdf_import(file)
return
end
family_document = Current.family.upload_document(
file_content: file.read,
filename: filename
)
if family_document
redirect_to new_import_path, notice: t("imports.create.document_uploaded")
else
redirect_to new_import_path, alert: t("imports.create.document_upload_failed")
end
end
def document_upload_supported_extensions
adapter = VectorStore.adapter
return [] unless adapter
adapter.supported_extensions.map(&:downcase).uniq.sort
end
def document_upload_request?
params.dig(:import, :type) == "DocumentImport"
end
def valid_pdf_file?(file)
header = file.read(5)
file.rewind

View File

@@ -141,10 +141,10 @@
</li>
<% end %>
<% if (params[:type].nil? || params[:type] == "PdfImport") && Provider::Registry.get_provider(:openai)&.supports_pdf_processing? %>
<% if (params[:type].nil? || params[:type].in?(%w[DocumentImport PdfImport])) && @document_upload_extensions.any? %>
<li>
<%= styled_form_with url: imports_path, scope: :import, multipart: true, class: "w-full" do |form| %>
<%= form.hidden_field :type, value: "PdfImport" %>
<%= form.hidden_field :type, value: "DocumentImport" %>
<label class="flex items-center justify-between p-4 group cursor-pointer w-full">
<div class="flex items-center gap-2">
<div class="bg-red-500/5 rounded-md w-8 h-8 flex items-center justify-center">
@@ -154,15 +154,15 @@
</div>
<div class="text-left">
<span class="text-sm text-primary group-hover:text-secondary block">
<%= t(".import_pdf") %>
<%= t(".import_file") %>
</span>
<span class="text-xs text-secondary">
<%= t(".import_pdf_description") %>
<%= t(".import_file_description") %>
</span>
</div>
</div>
<%= icon("chevron-right") %>
<%= form.file_field :import_file, accept: "application/pdf", class: "hidden", onchange: "this.form.submit()" %>
<%= form.file_field :import_file, accept: @document_upload_extensions.join(","), class: "hidden", onchange: "this.form.submit()" %>
</label>
<% end %>

View File

@@ -102,11 +102,11 @@ en:
import_portfolio: Import investments
import_rules: Import rules
import_transactions: Import transactions
import_pdf: Import PDF document
import_pdf_description: AI-powered document analysis
import_file: Import document
import_file_description: AI-powered analysis for PDFs and searchable upload for other supported files
resume: Resume %{type}
sources: Sources
title: New CSV Import
title: New Import
create:
file_too_large: File is too large. Maximum size is %{max_size}MB.
invalid_file_type: Invalid file type. Please upload a CSV file.
@@ -114,6 +114,11 @@ en:
pdf_too_large: PDF file is too large. Maximum size is %{max_size}MB.
pdf_processing: Your PDF is being processed. You will receive an email when analysis is complete.
invalid_pdf: The uploaded file is not a valid PDF.
document_too_large: Document file is too large. Maximum size is %{max_size}MB.
invalid_document_file_type: Invalid document file type for the active vector store.
document_uploaded: Document uploaded successfully.
document_upload_failed: We couldn't upload the document to the vector store. Please try again.
document_provider_not_configured: No vector store is configured for document uploads.
show:
finalize_upload: Please finalize your file upload.
finalize_mappings: Please finalize your mappings before proceeding.

View File

@@ -35,6 +35,70 @@ class ImportsControllerTest < ActionDispatch::IntegrationTest
assert_redirected_to import_upload_url(Import.all.ordered.first)
end
test "uploads supported non-pdf document for vector store without creating import" do
adapter = mock("vector_store_adapter")
adapter.stubs(:supported_extensions).returns(%w[.csv .pdf])
VectorStore::Registry.stubs(:adapter).returns(adapter)
@user.family.expects(:upload_document).with do |file_content:, filename:, **|
assert_not_empty file_content
assert_equal "valid.csv", filename
true
end.returns(family_documents(:tax_return))
assert_no_difference "Import.count" do
post imports_url, params: {
import: {
type: "DocumentImport",
import_file: file_fixture_upload("imports/valid.csv", "text/csv")
}
}
end
assert_redirected_to new_import_url
assert_equal I18n.t("imports.create.document_uploaded"), flash[:notice]
end
test "uploads pdf document as PdfImport when using DocumentImport option" do
adapter = mock("vector_store_adapter")
adapter.stubs(:supported_extensions).returns(%w[.pdf .txt])
VectorStore::Registry.stubs(:adapter).returns(adapter)
@user.family.expects(:upload_document).never
assert_difference "Import.count", 1 do
post imports_url, params: {
import: {
type: "DocumentImport",
import_file: file_fixture_upload("imports/sample_bank_statement.pdf", "application/pdf")
}
}
end
created_import = Import.order(:created_at).last
assert_equal "PdfImport", created_import.type
assert_redirected_to import_url(created_import)
assert_equal I18n.t("imports.create.pdf_processing"), flash[:notice]
end
test "rejects unsupported document type for DocumentImport option" do
adapter = mock("vector_store_adapter")
adapter.stubs(:supported_extensions).returns(%w[.pdf .txt])
VectorStore::Registry.stubs(:adapter).returns(adapter)
assert_no_difference "Import.count" do
post imports_url, params: {
import: {
type: "DocumentImport",
import_file: file_fixture_upload("profile_image.png", "image/png")
}
}
end
assert_redirected_to new_import_url
assert_equal I18n.t("imports.create.invalid_document_file_type"), flash[:alert]
end
test "publishes import" do
import = imports(:transaction)