Files
sure/app/models/import.rb
Juan José Mata 2595885eb7 Full .ndjson import / reorganize UI with Financial Tools / Raw Data tabs (#1208)
* Reorganize import UI with Financial Tools / Raw Data tabs

Split the flat list of import sources into two tabbed sections using
DS::Tabs: "Financial Tools" (Mint, Quicken/QIF, YNAB coming soon) and
"Raw Data" (transactions, investments, accounts, categories, rules,
documents). This prepares for adding more tool-specific importers
without cluttering the list.

https://claude.ai/code/session_01BM4SBWNhATqoKTEvy3qTS3

* Fix import controller test to account for YNAB coming soon entry

The new YNAB "coming soon" disabled entry adds a 5th aria-disabled
element to the import dialog.

https://claude.ai/code/session_01BM4SBWNhATqoKTEvy3qTS3

* Fix system tests to click Raw Data tab before selecting import type

Transaction, trade, and account imports are now under the Raw Data tab
and need an explicit tab click before the buttons are visible.

https://claude.ai/code/session_01BM4SBWNhATqoKTEvy3qTS3

* feat: Add bulk import for NDJSON export files

Implements an import flow that accepts the full all.ndjson file from data exports,
allowing users to restore their complete data including:
- Accounts with accountable types
- Categories with parent relationships
- Tags and merchants
- Transactions with category, merchant, and tag references
- Trades with securities
- Valuations
- Budgets and budget categories
- Rules with conditions and actions (including compound conditions)

Key changes:
- Add BulkImport model extending Import base class
- Add Family::DataImporter to handle NDJSON parsing and import logic
- Update imports controller and views to support NDJSON workflow
- Skip configuration/mapping steps for structured NDJSON imports
- Add i18n translations for bulk import UI
- Add tests for BulkImport and DataImporter

* fix: Fix category import and test query issues

- Add default lucide_icon ("shapes") for categories when not provided
- Fix valuation test to use proper ActiveRecord joins syntax

* Linter errors

* fix: Add default color for tags when not provided in import

* fix: Add default kind for transactions when not provided in import

* Fix test

* Fix tests

* Fix remaining merge conflicts from PR 766 cherry-pick

Resolve conflict markers in test fixtures and clean up BulkImport
entry in new.html.erb to use the _import_option partial consistently.

https://claude.ai/code/session_01BM4SBWNhATqoKTEvy3qTS3

* Import Sure `.ndjson`

* Remove `.ndjson` import from raw data

* Fix support for Sure "bulk" import from old branch

* Linter

* Fix CI test

* Fix more CI tests

* Fix tests

* Fix tests / move PDF import to first tab

* Remove redundant title

---------

Co-authored-by: Claude <noreply@anthropic.com>
2026-03-23 14:27:41 +01:00

413 lines
12 KiB
Ruby

class Import < ApplicationRecord
MaxRowCountExceededError = Class.new(StandardError)
MappingError = Class.new(StandardError)
MAX_CSV_SIZE = 10.megabytes
MAX_PDF_SIZE = 25.megabytes
ALLOWED_CSV_MIME_TYPES = %w[text/csv text/plain application/vnd.ms-excel application/csv].freeze
ALLOWED_PDF_MIME_TYPES = %w[application/pdf].freeze
DOCUMENT_TYPES = %w[bank_statement credit_card_statement investment_statement financial_document contract other].freeze
TYPES = %w[TransactionImport TradeImport AccountImport MintImport CategoryImport RuleImport PdfImport QifImport SureImport].freeze
SIGNAGE_CONVENTIONS = %w[inflows_positive inflows_negative]
SEPARATORS = [ [ "Comma (,)", "," ], [ "Semicolon (;)", ";" ] ].freeze
NUMBER_FORMATS = {
"1,234.56" => { separator: ".", delimiter: "," }, # US/UK/Asia
"1.234,56" => { separator: ",", delimiter: "." }, # Most of Europe
"1 234,56" => { separator: ",", delimiter: " " }, # French/Scandinavian
"1,234" => { separator: "", delimiter: "," } # Zero-decimal currencies like JPY
}.freeze
AMOUNT_TYPE_STRATEGIES = %w[signed_amount custom_column].freeze
belongs_to :family
belongs_to :account, optional: true
before_validation :set_default_number_format
before_validation :ensure_utf8_encoding
scope :ordered, -> { order(created_at: :desc) }
enum :status, {
pending: "pending",
complete: "complete",
importing: "importing",
reverting: "reverting",
revert_failed: "revert_failed",
failed: "failed"
}, validate: true, default: "pending"
validates :type, inclusion: { in: TYPES }
validates :amount_type_strategy, inclusion: { in: AMOUNT_TYPE_STRATEGIES }
validates :col_sep, inclusion: { in: SEPARATORS.map(&:last) }
validates :signage_convention, inclusion: { in: SIGNAGE_CONVENTIONS }, allow_nil: true
validates :number_format, presence: true, inclusion: { in: NUMBER_FORMATS.keys }
validate :custom_column_import_requires_identifier
validates :rows_to_skip, numericality: { only_integer: true, greater_than_or_equal_to: 0 }
validate :account_belongs_to_family
validate :rows_to_skip_within_file_bounds
has_many :rows, dependent: :destroy
has_many :mappings, dependent: :destroy
has_many :accounts, dependent: :destroy
has_many :entries, dependent: :destroy
class << self
def parse_csv_str(csv_str, col_sep: ",")
CSV.parse(
(csv_str || "").strip,
headers: true,
col_sep: col_sep,
converters: [ ->(str) { str&.strip } ],
liberal_parsing: true
)
end
end
def publish_later
raise MaxRowCountExceededError if row_count_exceeded?
raise "Import is not publishable" unless publishable?
update! status: :importing
ImportJob.perform_later(self)
end
def publish
raise MaxRowCountExceededError if row_count_exceeded?
import!
family.sync_later
update! status: :complete
rescue => error
update! status: :failed, error: error.message
end
def revert_later
raise "Import is not revertable" unless revertable?
update! status: :reverting
RevertImportJob.perform_later(self)
end
def revert
Import.transaction do
accounts.destroy_all
entries.destroy_all
end
family.sync_later
update! status: :pending
rescue => error
update! status: :revert_failed, error: error.message
end
def csv_rows
@csv_rows ||= parsed_csv
end
def csv_headers
parsed_csv.headers
end
def csv_sample
@csv_sample ||= parsed_csv.first(2)
end
def dry_run
mappings = {
transactions: rows_count,
categories: Import::CategoryMapping.for_import(self).creational.count,
tags: Import::TagMapping.for_import(self).creational.count
}
mappings.merge(
accounts: Import::AccountMapping.for_import(self).creational.count,
) if account.nil?
mappings
end
def required_column_keys
[]
end
# Returns false for import types that don't need CSV column mapping (e.g., PdfImport).
# Override in subclasses that handle data extraction differently.
def requires_csv_workflow?
true
end
# Subclasses that require CSV workflow must override this.
# Non-CSV imports (e.g., PdfImport) can return [].
def column_keys
raise NotImplementedError, "Subclass must implement column_keys"
end
def generate_rows_from_csv
rows.destroy_all
mapped_rows = csv_rows.map do |row|
{
account: row[account_col_label].to_s,
date: row[date_col_label].to_s,
qty: sanitize_number(row[qty_col_label]).to_s,
ticker: row[ticker_col_label].to_s,
exchange_operating_mic: row[exchange_operating_mic_col_label].to_s,
price: sanitize_number(row[price_col_label]).to_s,
amount: sanitize_number(row[amount_col_label]).to_s,
currency: (row[currency_col_label] || default_currency).to_s,
name: (row[name_col_label] || default_row_name).to_s,
category: row[category_col_label].to_s,
tags: row[tags_col_label].to_s,
entity_type: row[entity_type_col_label].to_s,
notes: row[notes_col_label].to_s
}
end
rows.insert_all!(mapped_rows)
update_column(:rows_count, rows.count)
end
def sync_mappings
transaction do
mapping_steps.each do |mapping_class|
mappables_by_key = mapping_class.mappables_by_key(self)
updated_mappings = mappables_by_key.map do |key, mappable|
mapping = mappings.find_or_initialize_by(key: key, import: self, type: mapping_class.name)
mapping.mappable = mappable
mapping.create_when_empty = key.present? && mappable.nil?
mapping
end
updated_mappings.each { |m| m.save(validate: false) }
mapping_class.where.not(id: updated_mappings.map(&:id)).destroy_all
end
end
end
def mapping_steps
[]
end
def rows_ordered
rows.ordered
end
def uploaded?
raw_file_str.present?
end
def configured?
uploaded? && rows_count > 0
end
def cleaned?
configured? && rows.all?(&:valid?)
end
def publishable?
cleaned? && mappings.all?(&:valid?)
end
def revertable?
complete? || revert_failed?
end
def has_unassigned_account?
mappings.accounts.where(key: "").any?
end
def requires_account?
family.accounts.empty? && has_unassigned_account?
end
# Used to optionally pre-fill the configuration for the current import
def suggested_template
family.imports
.complete
.where(account: account, type: type)
.order(created_at: :desc)
.first
end
def apply_template!(import_template)
update!(
import_template.attributes.slice(
"date_col_label", "amount_col_label", "name_col_label",
"category_col_label", "tags_col_label", "account_col_label",
"qty_col_label", "ticker_col_label", "price_col_label",
"entity_type_col_label", "notes_col_label", "currency_col_label",
"date_format", "signage_convention", "number_format",
"exchange_operating_mic_col_label",
"rows_to_skip"
)
)
end
def max_row_count
10000
end
private
def row_count_exceeded?
rows_count > max_row_count
end
def import!
# no-op, subclasses can implement for customization of algorithm
end
def default_row_name
"Imported item"
end
def default_currency
account&.currency || family.currency
end
def parsed_csv
return @parsed_csv if defined?(@parsed_csv)
csv_content = raw_file_str || ""
if rows_to_skip.to_i > 0
csv_content = csv_content.lines.drop(rows_to_skip).join
end
@parsed_csv = self.class.parse_csv_str(csv_content, col_sep: col_sep)
end
def sanitize_number(value)
return "" if value.nil?
format = NUMBER_FORMATS[number_format]
return "" unless format
# First, normalize spaces and remove any characters that aren't numbers, delimiters, separators, or minus signs
sanitized = value.to_s.strip
# Handle French/Scandinavian format specially
if format[:delimiter] == " "
sanitized = sanitized.gsub(/\s+/, "") # Remove all spaces first
else
sanitized = sanitized.gsub(/[^\d#{Regexp.escape(format[:delimiter])}#{Regexp.escape(format[:separator])}\-]/, "")
# Replace delimiter with empty string
if format[:delimiter].present?
sanitized = sanitized.gsub(format[:delimiter], "")
end
end
# Replace separator with period for proper float parsing
if format[:separator].present?
sanitized = sanitized.gsub(format[:separator], ".")
end
# Return empty string if not a valid number
unless sanitized =~ /\A-?\d+\.?\d*\z/
return ""
end
sanitized
end
def set_default_number_format
self.number_format ||= "1,234.56" # Default to US/UK format
end
def custom_column_import_requires_identifier
return unless amount_type_strategy == "custom_column"
if amount_type_inflow_value.blank?
errors.add(:base, I18n.t("imports.errors.custom_column_requires_inflow"))
end
end
# Common encodings to try when UTF-8 detection fails
# Windows-1250 is prioritized for Central/Eastern European languages
COMMON_ENCODINGS = [ "Windows-1250", "Windows-1252", "ISO-8859-1", "ISO-8859-2" ].freeze
def ensure_utf8_encoding
# Handle nil or empty string first (before checking if changed)
return if raw_file_str.nil? || raw_file_str.bytesize == 0
# Only process if the attribute was changed
# Use will_save_change_to_attribute? which is safer for binary data
return unless will_save_change_to_raw_file_str?
# If already valid UTF-8, nothing to do
begin
if raw_file_str.encoding == Encoding::UTF_8 && raw_file_str.valid_encoding?
return
end
rescue ArgumentError
# raw_file_str might have invalid encoding, continue to detection
end
# Detect encoding using rchardet
begin
require "rchardet"
detection = CharDet.detect(raw_file_str)
detected_encoding = detection["encoding"]
confidence = detection["confidence"]
# Only convert if we have reasonable confidence in the detection
if detected_encoding && confidence > 0.75
# Force encoding and convert to UTF-8
self.raw_file_str = raw_file_str.force_encoding(detected_encoding).encode("UTF-8", invalid: :replace, undef: :replace)
else
# Fallback: try common encodings
try_common_encodings
end
rescue LoadError
# rchardet not available, fallback to trying common encodings
try_common_encodings
rescue ArgumentError, Encoding::CompatibilityError => e
# Handle encoding errors by falling back to common encodings
try_common_encodings
end
end
def try_common_encodings
COMMON_ENCODINGS.each do |encoding|
begin
test = raw_file_str.dup.force_encoding(encoding)
if test.valid_encoding?
self.raw_file_str = test.encode("UTF-8", invalid: :replace, undef: :replace)
return
end
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
next
end
end
# If nothing worked, force UTF-8 and replace invalid bytes
self.raw_file_str = raw_file_str.force_encoding("UTF-8").scrub("?")
end
def account_belongs_to_family
return if account.nil?
return if account.family_id == family_id
errors.add(:account, "must belong to your family")
end
def rows_to_skip_within_file_bounds
return if raw_file_str.blank?
return if rows_to_skip.to_i == 0
line_count = raw_file_str.lines.count
if rows_to_skip.to_i >= line_count
errors.add(:rows_to_skip, "must be less than the number of lines in the file (#{line_count})")
end
end
end