mirror of
https://github.com/we-promise/sure.git
synced 2026-04-11 00:04:47 +00:00
* Initial plan * Add encoding detection for CSV imports to handle Windows-1250 and other non-UTF-8 encodings Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com> * Improve encoding detection: prioritize Windows-1250 and increase confidence threshold Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com> * Update Gemfile.lock with rchardet dependency Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com> * Refactor: Extract common encodings to constant and deduplicate code Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com> * Fix Rubocop style violations Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com> * Fix linter violations and encoding detection logic - Remove trailing whitespace from test file (9 lines) - Fix ensure_utf8_encoding to handle binary strings properly by checking bytesize instead of blank? - Add error handling for ArgumentError and Encoding::CompatibilityError - Add invalid/undef replacement options to encode calls for robustness Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com> * Fix encoding error in ensure_utf8_encoding method - Use will_save_change_to_raw_file_str? instead of raw_file_str_changed? to avoid encoding errors when checking if attribute changed - Wrap UTF-8 validation check in begin/rescue to handle ArgumentError from invalid encodings - This fixes the test failure: "ArgumentError: invalid byte sequence in UTF-8" Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com> * Fix test: add missing column labels and reload import before checking rows Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com> * Fix test: ensure import is reloaded before checking rows_count and accessing rows Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: jjmata <187772+jjmata@users.noreply.github.com>
75 lines
2.5 KiB
Ruby
75 lines
2.5 KiB
Ruby
require "test_helper"
|
|
|
|
class ImportEncodingTest < ActiveSupport::TestCase
|
|
setup do
|
|
@family = families(:dylan_family)
|
|
@account = accounts(:depository)
|
|
end
|
|
|
|
test "successfully imports Windows-1250 encoded CSV" do
|
|
# Test that Windows-1250 encoded files are properly converted to UTF-8
|
|
file_path = Rails.root.join("test/fixtures/files/imports/windows1250.csv")
|
|
csv_content = File.binread(file_path)
|
|
|
|
# Verify the file is not UTF-8
|
|
assert_equal Encoding::ASCII_8BIT, csv_content.encoding
|
|
refute csv_content.force_encoding("UTF-8").valid_encoding?, "Test file should not be valid UTF-8"
|
|
|
|
import = @family.imports.create!(
|
|
type: "TransactionImport",
|
|
account: @account,
|
|
date_format: "%Y-%m-%d",
|
|
date_col_label: "Date",
|
|
amount_col_label: "Amount",
|
|
name_col_label: "Name",
|
|
category_col_label: "Category",
|
|
tags_col_label: "Tags",
|
|
account_col_label: "Account",
|
|
notes_col_label: "Notes",
|
|
signage_convention: "inflows_negative",
|
|
amount_type_strategy: "signed_amount"
|
|
)
|
|
|
|
# With encoding detection, the import should succeed
|
|
assert_nothing_raised do
|
|
import.update!(raw_file_str: csv_content)
|
|
end
|
|
|
|
# Verify the raw_file_str was converted to UTF-8
|
|
assert_equal Encoding::UTF_8, import.raw_file_str.encoding
|
|
assert import.raw_file_str.valid_encoding?, "Converted string should be valid UTF-8"
|
|
|
|
# Verify we can generate rows from the CSV
|
|
assert_nothing_raised do
|
|
import.generate_rows_from_csv
|
|
end
|
|
|
|
# Verify that rows were created
|
|
import.reload
|
|
assert import.rows_count > 0, "Expected rows to be created from Windows-1250 CSV"
|
|
assert_equal 3, import.rows_count, "Expected 3 data rows"
|
|
|
|
# Verify Polish characters were preserved correctly
|
|
first_row = import.rows.first
|
|
assert_not_nil first_row, "Expected first row to exist"
|
|
assert_includes first_row.name, "spożywczy", "Polish characters should be preserved"
|
|
end
|
|
|
|
test "handles UTF-8 files without modification" do
|
|
# Test that valid UTF-8 files are not modified
|
|
file_path = Rails.root.join("test/fixtures/files/imports/transactions.csv")
|
|
csv_content = File.read(file_path, encoding: "UTF-8")
|
|
|
|
import = @family.imports.create!(
|
|
type: "TransactionImport",
|
|
account: @account,
|
|
date_format: "%Y-%m-%d",
|
|
raw_file_str: csv_content
|
|
)
|
|
|
|
# UTF-8 content should remain unchanged
|
|
assert_equal Encoding::UTF_8, import.raw_file_str.encoding
|
|
assert import.raw_file_str.valid_encoding?
|
|
end
|
|
end
|