Add transaction dedup support for CSV imports (#304)

* Support dedup for transaction also for CSV

* Fix to exclude CSV importing duplicates

* Guard nil account
This commit is contained in:
soky srm
2025-11-10 12:09:22 +01:00
committed by GitHub
parent 3611413829
commit f882484bba
4 changed files with 317 additions and 41 deletions

View File

@@ -30,6 +30,7 @@ class Account::ProviderImportAdapter
# If this is a new entry, check for potential duplicates from manual/CSV imports
# This handles the case where a user manually created or CSV imported a transaction
# before linking their account to a provider
# Note: We don't pass name here to allow matching even when provider formats names differently
if entry.new_record?
duplicate = find_duplicate_transaction(date: date, amount: amount, currency: currency)
if duplicate
@@ -266,33 +267,41 @@ class Account::ProviderImportAdapter
false
end
private
# Finds a potential duplicate transaction from manual entry or CSV import
# Matches on date, amount, currency, and optionally name
# Only matches transactions without external_id (manual/CSV imported)
#
# @param date [Date, String] Transaction date
# @param amount [BigDecimal, Numeric] Transaction amount
# @param currency [String] Currency code
# @param name [String, nil] Optional transaction name for more accurate matching
# @param exclude_entry_ids [Set, Array, nil] Entry IDs to exclude from the search (e.g., already claimed entries)
# @return [Entry, nil] The duplicate entry or nil if not found
def find_duplicate_transaction(date:, amount:, currency:, name: nil, exclude_entry_ids: nil)
# Convert date to Date object if it's a string
date = Date.parse(date.to_s) unless date.is_a?(Date)
# Finds a potential duplicate transaction from manual entry or CSV import
# Matches on date, amount, and currency
# Only matches transactions without external_id (manual/CSV imported)
#
# @param date [Date, String] Transaction date
# @param amount [BigDecimal, Numeric] Transaction amount
# @param currency [String] Currency code
# @return [Entry, nil] The duplicate entry or nil if not found
def find_duplicate_transaction(date:, amount:, currency:)
# Convert date to Date object if it's a string
date = Date.parse(date.to_s) unless date.is_a?(Date)
# Look for entries on the same account with:
# 1. Same date
# 2. Same amount (exact match)
# 3. Same currency
# 4. No external_id (manual/CSV imported transactions)
# 5. Entry type is Transaction (not Trade or Valuation)
# 6. Optionally same name (if name parameter is provided)
# 7. Not in the excluded IDs list (if provided)
query = account.entries
.where(entryable_type: "Transaction")
.where(date: date)
.where(amount: amount)
.where(currency: currency)
.where(external_id: nil)
# Look for entries on the same account with:
# 1. Same date
# 2. Same amount (exact match)
# 3. Same currency
# 4. No external_id (manual/CSV imported transactions)
# 5. Entry type is Transaction (not Trade or Valuation)
account.entries
.where(entryable_type: "Transaction")
.where(date: date)
.where(amount: amount)
.where(currency: currency)
.where(external_id: nil)
.order(created_at: :asc)
.first
end
# Add name filter if provided
query = query.where(name: name) if name.present?
# Exclude already claimed entries if provided
query = query.where.not(id: exclude_entry_ids) if exclude_entry_ids.present?
query.order(created_at: :asc).first
end
end

View File

@@ -1,5 +1,6 @@
class Import < ApplicationRecord
MaxRowCountExceededError = Class.new(StandardError)
MappingError = Class.new(StandardError)
TYPES = %w[TransactionImport TradeImport AccountImport MintImport].freeze
SIGNAGE_CONVENTIONS = %w[inflows_positive inflows_negative]

View File

@@ -3,32 +3,76 @@ class TransactionImport < Import
transaction do
mappings.each(&:create_mappable!)
transactions = rows.map do |row|
new_transactions = []
updated_entries = []
claimed_entry_ids = Set.new # Track entries we've already claimed in this import
rows.each_with_index do |row, index|
mapped_account = if account
account
else
mappings.accounts.mappable_for(row.account)
end
# Guard against nil account - this happens when an account name in CSV is not mapped
if mapped_account.nil?
row_number = index + 1
account_name = row.account.presence || "(blank)"
error_message = "Row #{row_number}: Account '#{account_name}' is not mapped to an existing account. " \
"Please map this account in the import configuration."
errors.add(:base, error_message)
raise Import::MappingError, error_message
end
category = mappings.categories.mappable_for(row.category)
tags = row.tags_list.map { |tag| mappings.tags.mappable_for(tag) }.compact
Transaction.new(
category: category,
tags: tags,
entry: Entry.new(
account: mapped_account,
date: row.date_iso,
amount: row.signed_amount,
name: row.name,
currency: row.currency,
notes: row.notes,
import: self
)
# Check for duplicate transactions using the adapter's deduplication logic
# Pass claimed_entry_ids to exclude entries we've already matched in this import
# This ensures identical rows within the CSV are all imported as separate transactions
adapter = Account::ProviderImportAdapter.new(mapped_account)
duplicate_entry = adapter.find_duplicate_transaction(
date: row.date_iso,
amount: row.signed_amount,
currency: row.currency,
name: row.name,
exclude_entry_ids: claimed_entry_ids
)
if duplicate_entry
# Update existing transaction instead of creating a new one
duplicate_entry.transaction.category = category if category.present?
duplicate_entry.transaction.tags = tags if tags.any?
duplicate_entry.notes = row.notes if row.notes.present?
duplicate_entry.import = self
updated_entries << duplicate_entry
claimed_entry_ids.add(duplicate_entry.id)
else
# Create new transaction (no duplicate found)
new_transactions << Transaction.new(
category: category,
tags: tags,
entry: Entry.new(
account: mapped_account,
date: row.date_iso,
amount: row.signed_amount,
name: row.name,
currency: row.currency,
notes: row.notes,
import: self
)
)
end
end
Transaction.import!(transactions, recursive: true)
# Save updated entries first
updated_entries.each do |entry|
entry.transaction.save!
entry.save!
end
# Bulk import new transactions
Transaction.import!(new_transactions, recursive: true) if new_transactions.any?
end
end

View File

@@ -100,4 +100,226 @@ class TransactionImportTest < ActiveSupport::TestCase
assert_equal [ -100, 200, -300 ], @import.entries.map(&:amount)
end
test "does not create duplicate when matching transaction exists with same name" do
account = accounts(:depository)
# Create an existing manual transaction
existing_entry = account.entries.create!(
date: Date.new(2024, 1, 1),
amount: 100,
currency: "USD",
name: "Coffee Shop",
entryable: Transaction.new(category: categories(:food_and_drink))
)
# Try to import a CSV with the same transaction
import_csv = <<~CSV
date,name,amount
01/01/2024,Coffee Shop,100
CSV
@import.update!(
account: account,
raw_file_str: import_csv,
date_col_label: "date",
amount_col_label: "amount",
name_col_label: "name",
date_format: "%m/%d/%Y",
amount_type_strategy: "signed_amount",
signage_convention: "inflows_negative"
)
@import.generate_rows_from_csv
@import.reload
# Should not create a new entry, should update the existing one
assert_no_difference -> { Entry.count } do
assert_no_difference -> { Transaction.count } do
@import.publish
end
end
# The existing entry should now be linked to the import
assert_equal @import.id, existing_entry.reload.import_id
end
test "creates new transaction when name differs even if date and amount match" do
account = accounts(:depository)
# Create an existing manual transaction
existing_entry = account.entries.create!(
date: Date.new(2024, 1, 1),
amount: 100,
currency: "USD",
name: "Coffee Shop",
entryable: Transaction.new
)
# Try to import a CSV with same date/amount but different name
import_csv = <<~CSV
date,name,amount
01/01/2024,Different Store,100
CSV
@import.update!(
account: account,
raw_file_str: import_csv,
date_col_label: "date",
amount_col_label: "amount",
name_col_label: "name",
date_format: "%m/%d/%Y",
amount_type_strategy: "signed_amount",
signage_convention: "inflows_negative"
)
@import.generate_rows_from_csv
@import.reload
# Should create a new entry because the name is different
assert_difference -> { Entry.count } => 1,
-> { Transaction.count } => 1 do
@import.publish
end
# Both transactions should exist
assert_equal 2, account.entries.where(date: Date.new(2024, 1, 1), amount: 100).count
end
test "imports all identical transactions from CSV even when one exists in database" do
account = accounts(:depository)
# Create an existing manual transaction
existing_entry = account.entries.create!(
date: Date.new(2024, 1, 1),
amount: 50,
currency: "USD",
name: "Vending Machine",
entryable: Transaction.new
)
# Import CSV with 3 identical transactions (e.g., buying from vending machine 3 times)
import_csv = <<~CSV
date,name,amount
01/01/2024,Vending Machine,50
01/01/2024,Vending Machine,50
01/01/2024,Vending Machine,50
CSV
@import.update!(
account: account,
raw_file_str: import_csv,
date_col_label: "date",
amount_col_label: "amount",
name_col_label: "name",
date_format: "%m/%d/%Y",
amount_type_strategy: "signed_amount",
signage_convention: "inflows_negative"
)
@import.generate_rows_from_csv
@import.reload
# Should update 1 existing and create 2 new (total of 3 in system)
# The first matching row claims the existing entry, the other 2 create new ones
assert_difference -> { Entry.count } => 2,
-> { Transaction.count } => 2 do
@import.publish
end
# Should have exactly 3 identical transactions total
assert_equal 3, account.entries.where(
date: Date.new(2024, 1, 1),
amount: 50,
name: "Vending Machine"
).count
# The existing entry should be linked to the import
assert_equal @import.id, existing_entry.reload.import_id
end
test "imports all identical transactions from CSV when none exist in database" do
account = accounts(:depository)
# Import CSV with 3 identical transactions (no existing entry in database)
import_csv = <<~CSV
date,name,amount
01/01/2024,Vending Machine,50
01/01/2024,Vending Machine,50
01/01/2024,Vending Machine,50
CSV
@import.update!(
account: account,
raw_file_str: import_csv,
date_col_label: "date",
amount_col_label: "amount",
name_col_label: "name",
date_format: "%m/%d/%Y",
amount_type_strategy: "signed_amount",
signage_convention: "inflows_negative"
)
@import.generate_rows_from_csv
@import.reload
# Should create all 3 as new transactions
assert_difference -> { Entry.count } => 3,
-> { Transaction.count } => 3 do
@import.publish
end
# Should have exactly 3 identical transactions
assert_equal 3, account.entries.where(
date: Date.new(2024, 1, 1),
amount: 50,
name: "Vending Machine"
).count
end
test "does not raise error when all accounts are properly mapped" do
# Import CSV with multiple accounts, all mapped
import_csv = <<~CSV
date,name,amount,account
01/01/2024,Coffee Shop,100,Checking Account
01/02/2024,Grocery Store,200,Credit Card
CSV
checking = accounts(:depository)
credit_card = accounts(:credit_card)
@import.update!(
account: nil,
raw_file_str: import_csv,
date_col_label: "date",
amount_col_label: "amount",
name_col_label: "name",
account_col_label: "account",
date_format: "%m/%d/%Y",
amount_type_strategy: "signed_amount",
signage_convention: "inflows_negative"
)
@import.generate_rows_from_csv
# Map both accounts
@import.mappings.create!(key: "Checking Account", mappable: checking, type: "Import::AccountMapping")
@import.mappings.create!(key: "Credit Card", mappable: credit_card, type: "Import::AccountMapping")
@import.mappings.create!(key: "", mappable: nil, create_when_empty: false, type: "Import::CategoryMapping")
@import.mappings.create!(key: "", mappable: nil, create_when_empty: false, type: "Import::TagMapping")
@import.reload
# Should succeed without errors
assert_difference -> { Entry.count } => 2,
-> { Transaction.count } => 2 do
@import.publish
end
assert_equal "complete", @import.status
# Check that each account got one entry from this import
assert_equal 1, checking.entries.where(import: @import).count
assert_equal 1, credit_card.entries.where(import: @import).count
end
end