Files
sure/test/models/import_encoding_test.rb
Copilot 5b736bf691 Fix CSV import for non-UTF-8 encodings (Windows-1250, ISO-8859-2, etc.) (#617)
* Initial plan

* Add encoding detection for CSV imports to handle Windows-1250 and other non-UTF-8 encodings

Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com>

* Improve encoding detection: prioritize Windows-1250 and increase confidence threshold

Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com>

* Update Gemfile.lock with rchardet dependency

Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com>

* Refactor: Extract common encodings to constant and deduplicate code

Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com>

* Fix Rubocop style violations

Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com>

* Fix linter violations and encoding detection logic

- Remove trailing whitespace from test file (9 lines)
- Fix ensure_utf8_encoding to handle binary strings properly by checking bytesize instead of blank?
- Add error handling for ArgumentError and Encoding::CompatibilityError
- Add invalid/undef replacement options to encode calls for robustness

Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com>

* Fix encoding error in ensure_utf8_encoding method

- Use will_save_change_to_raw_file_str? instead of raw_file_str_changed? to avoid encoding errors when checking if attribute changed
- Wrap UTF-8 validation check in begin/rescue to handle ArgumentError from invalid encodings
- This fixes the test failure: "ArgumentError: invalid byte sequence in UTF-8"

Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com>

* Fix test: add missing column labels and reload import before checking rows

Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com>

* Fix test: ensure import is reloaded before checking rows_count and accessing rows

Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com>

---------

Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com>
Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com>
2026-01-12 10:17:55 +01:00

75 lines
2.5 KiB
Ruby

require "test_helper"
class ImportEncodingTest < ActiveSupport::TestCase
setup do
@family = families(:dylan_family)
@account = accounts(:depository)
end
test "successfully imports Windows-1250 encoded CSV" do
# Test that Windows-1250 encoded files are properly converted to UTF-8
file_path = Rails.root.join("test/fixtures/files/imports/windows1250.csv")
csv_content = File.binread(file_path)
# Verify the file is not UTF-8
assert_equal Encoding::ASCII_8BIT, csv_content.encoding
refute csv_content.force_encoding("UTF-8").valid_encoding?, "Test file should not be valid UTF-8"
import = @family.imports.create!(
type: "TransactionImport",
account: @account,
date_format: "%Y-%m-%d",
date_col_label: "Date",
amount_col_label: "Amount",
name_col_label: "Name",
category_col_label: "Category",
tags_col_label: "Tags",
account_col_label: "Account",
notes_col_label: "Notes",
signage_convention: "inflows_negative",
amount_type_strategy: "signed_amount"
)
# With encoding detection, the import should succeed
assert_nothing_raised do
import.update!(raw_file_str: csv_content)
end
# Verify the raw_file_str was converted to UTF-8
assert_equal Encoding::UTF_8, import.raw_file_str.encoding
assert import.raw_file_str.valid_encoding?, "Converted string should be valid UTF-8"
# Verify we can generate rows from the CSV
assert_nothing_raised do
import.generate_rows_from_csv
end
# Verify that rows were created
import.reload
assert import.rows_count > 0, "Expected rows to be created from Windows-1250 CSV"
assert_equal 3, import.rows_count, "Expected 3 data rows"
# Verify Polish characters were preserved correctly
first_row = import.rows.first
assert_not_nil first_row, "Expected first row to exist"
assert_includes first_row.name, "spożywczy", "Polish characters should be preserved"
end
test "handles UTF-8 files without modification" do
# Test that valid UTF-8 files are not modified
file_path = Rails.root.join("test/fixtures/files/imports/transactions.csv")
csv_content = File.read(file_path, encoding: "UTF-8")
import = @family.imports.create!(
type: "TransactionImport",
account: @account,
date_format: "%Y-%m-%d",
raw_file_str: csv_content
)
# UTF-8 content should remain unchanged
assert_equal Encoding::UTF_8, import.raw_file_str.encoding
assert import.raw_file_str.valid_encoding?
end
end