sure/app/models/import.rb

class Import < ApplicationRecord
  MaxRowCountExceededError = Class.new(StandardError)
  MappingError = Class.new(StandardError)

  # Shared CSV upload/content limit for web and API imports, including preflight.
  MAX_CSV_SIZE = 10.megabytes
  MAX_PDF_SIZE = 25.megabytes
  ALLOWED_CSV_MIME_TYPES = %w[text/csv text/plain application/vnd.ms-excel application/csv].freeze
  ALLOWED_PDF_MIME_TYPES = %w[application/pdf].freeze

  DOCUMENT_TYPES = %w[bank_statement credit_card_statement investment_statement financial_document contract other].freeze

  TYPES = %w[TransactionImport TradeImport AccountImport MintImport ActualImport CategoryImport RuleImport PdfImport QifImport SureImport].freeze
  SIGNAGE_CONVENTIONS = %w[inflows_positive inflows_negative]
  SEPARATORS = [ [ "Comma (,)", "," ], [ "Semicolon (;)", ";" ] ].freeze

  def self.separator_options
    [
      [ I18n.t("activerecord.attributes.import.col_seps.comma"), "," ],
      [ I18n.t("activerecord.attributes.import.col_seps.semicolon"), ";" ]
    ]
  end

  NUMBER_FORMATS = {
    "1,234.56" => { separator: ".", delimiter: "," },  # US/UK/Asia
    "1.234,56" => { separator: ",", delimiter: "." },  # Most of Europe
    "1 234,56" => { separator: ",", delimiter: " " },  # French/Scandinavian
    "1,234"    => { separator: "",  delimiter: "," }   # Zero-decimal currencies like JPY
  }.freeze

  def self.reasonable_date_range
    Date.new(1970, 1, 1)..Date.today.next_year(5)
  end

  def self.max_csv_size
    MAX_CSV_SIZE
  end

  AMOUNT_TYPE_STRATEGIES = %w[signed_amount custom_column].freeze

  belongs_to :family
  belongs_to :account, optional: true

  before_validation :set_default_number_format
  before_validation :ensure_utf8_encoding

  scope :ordered, -> { order(created_at: :desc) }

  enum :status, {
    pending: "pending",
    complete: "complete",
    importing: "importing",
    reverting: "reverting",
    revert_failed: "revert_failed",
    failed: "failed"
  }, validate: true, default: "pending"

  validates :type, inclusion: { in: TYPES }
  validates :amount_type_strategy, inclusion: { in: AMOUNT_TYPE_STRATEGIES }
  validates :col_sep, inclusion: { in: SEPARATORS.map(&:last) }
  validates :signage_convention, inclusion: { in: SIGNAGE_CONVENTIONS }, allow_nil: true
  validates :number_format, presence: true, inclusion: { in: NUMBER_FORMATS.keys }
  validate :custom_column_import_requires_identifier
  validates :rows_to_skip, numericality: { only_integer: true, greater_than_or_equal_to: 0 }
  validate :account_belongs_to_family
  validate :rows_to_skip_within_file_bounds

  has_many :rows, dependent: :destroy
  has_many :mappings, dependent: :destroy
  has_many :accounts, dependent: :destroy
  has_many :entries, dependent: :destroy

  class << self
    def parse_csv_str(csv_str, col_sep: ",")
      CSV.parse(
        (csv_str || "").strip,
        headers: true,
        col_sep: col_sep,
        converters: [ ->(str) { str&.strip } ],
        liberal_parsing: true
      )
    end

    # Attempts to identify the best-matching date format from a list of candidates
    # by trying to parse sample date strings with each format.
    #
    # Returns the strptime format string (e.g. "%m-%d-%Y") that best matches the
    # samples, or the +fallback+ when no candidate can parse any sample.
    #
    # Scoring:
    #   1. Formats that parse ALL samples beat those that only parse some.
    #   2. Among equal parse counts, formats whose parsed dates fall within a
    #      reasonable range (1970..today+5y) score higher.
    def detect_date_format(samples, candidates: Family::DATE_FORMATS.map(&:last), fallback: "%Y-%m-%d")
      return fallback if samples.blank?

      cleaned = samples.map(&:to_s).reject(&:blank?).uniq.first(50)
      return fallback if cleaned.empty?

      reasonable_range = reasonable_date_range

      scored = candidates.map do |fmt|
        parsed_count     = 0
        reasonable_count = 0

        cleaned.each do |s|
          begin
            date = Date.strptime(s, fmt)
          rescue Date::Error, ArgumentError
            next
          end
          next unless date

          parsed_count += 1
          reasonable_count += 1 if reasonable_range.cover?(date)
        end

        { format: fmt, parsed: parsed_count, reasonable: reasonable_count }
      end

      # Filter to candidates that parsed at least one sample
      viable = scored.select { |s| s[:parsed] > 0 }
      return fallback if viable.empty?

      best = viable.max_by { |s| [ s[:parsed], s[:reasonable] ] }
      best[:format]
    end
  end

  def publish_later
    raise MaxRowCountExceededError if row_count_exceeded?
    raise "Import is not publishable" unless publishable?

    update! status: :importing

    ImportJob.perform_later(self)
  end

  def publish
    raise MaxRowCountExceededError if row_count_exceeded?

    import!

    family.sync_later

    update! status: :complete
  rescue => error
    update! status: :failed, error: error.message
  end

  def revert_later
    raise "Import is not revertable" unless revertable?

    update! status: :reverting

    RevertImportJob.perform_later(self)
  end

  def revert
    Import.transaction do
      accounts.destroy_all
      entries.destroy_all
    end

    family.sync_later

    update! status: :pending
  rescue => error
    update! status: :revert_failed, error: error.message
  end

  def csv_rows
    @csv_rows ||= parsed_csv
  end

  def csv_headers
    parsed_csv.headers
  end

  def csv_sample
    @csv_sample ||= parsed_csv.first(2)
  end

  def dry_run
    mappings = {
      transactions: rows_count,
      categories: Import::CategoryMapping.for_import(self).creational.count,
      tags: Import::TagMapping.for_import(self).creational.count
    }

    mappings.merge(
      accounts: Import::AccountMapping.for_import(self).creational.count,
    ) if account.nil?

    mappings
  end

  def required_column_keys
    []
  end

  # Returns false for import types that don't need CSV column mapping (e.g., PdfImport).
  # Override in subclasses that handle data extraction differently.
  def requires_csv_workflow?
    true
  end

  # Subclasses that require CSV workflow must override this.
  # Non-CSV imports (e.g., PdfImport) can return [].
  def column_keys
    raise NotImplementedError, "Subclass must implement column_keys"
  end

  def generate_rows_from_csv
    rows.destroy_all

    mapped_rows = csv_rows.map.with_index(1) do |row, index|
      {
        source_row_number: index,
        account: csv_value(row, account_col_label, "account", "account_name").to_s,
        date: csv_value(row, date_col_label, "date").to_s,
        qty: sanitize_number(csv_value(row, qty_col_label, "qty", "quantity")).to_s,
        ticker: csv_value(row, ticker_col_label, "ticker").to_s,
        exchange_operating_mic: csv_value(row, exchange_operating_mic_col_label, "exchange_operating_mic").to_s,
        price: sanitize_number(csv_value(row, price_col_label, "price")).to_s,
        amount: sanitize_number(csv_value(row, amount_col_label, "amount", "balance")).to_s,
        currency: (csv_value(row, currency_col_label, "currency") || default_currency).to_s,
        name: (csv_value(row, name_col_label, "name") || default_row_name).to_s,
        category: csv_value(row, category_col_label, "category").to_s,
        tags: csv_value(row, tags_col_label, "tags").to_s,
        entity_type: csv_value(row, entity_type_col_label, "entity_type", "account_type", "type").to_s,
        notes: csv_value(row, notes_col_label, "notes").to_s
      }
    end

    rows.insert_all!(mapped_rows)
    update_column(:rows_count, rows.count)
  end

  def sync_mappings
    transaction do
      mapping_steps.each do |mapping_class|
        mappables_by_key = mapping_class.mappables_by_key(self)

        updated_mappings = mappables_by_key.map do |key, mappable|
          mapping = mappings.find_or_initialize_by(key: key, import: self, type: mapping_class.name)
          mapping.mappable = mappable
          mapping.create_when_empty = key.present? && mappable.nil?
          mapping
        end

        updated_mappings.each { |m| m.save(validate: false) }
        mapping_class.where.not(id: updated_mappings.map(&:id)).destroy_all
      end
    end
  end

  def mapping_steps
    []
  end

  def rows_ordered
    rows.ordered
  end

  def uploaded?
    raw_file_str.present?
  end

  def configured?
    uploaded? && rows_count > 0
  end

  def configured_for_status_detail?
    configured?
  end

  def cleaned?
    configured? && rows.all?(&:valid?)
  end

  def publishable?
    cleaned? && mappings.all?(&:valid?)
  end

  def cleaned_from_validation_stats?(invalid_rows_count:)
    configured? && invalid_rows_count.zero?
  end

  def publishable_from_validation_stats?(invalid_rows_count:)
    cleaned_from_validation_stats?(invalid_rows_count: invalid_rows_count) && mappings.all?(&:valid?)
  end

  def mapping_status_counts
    mappable_ids = mappings.pluck(:mappable_id)

    {
      mappings_count: mappable_ids.size,
      unassigned_mappings_count: mappable_ids.count(&:nil?)
    }
  end

  def revertable?
    complete? || revert_failed?
  end

  def has_unassigned_account?
    mappings.accounts.where(key: "").any?
  end

  def requires_account?
    family.accounts.empty? && has_unassigned_account?
  end

  # Used to optionally pre-fill the configuration for the current import
  def suggested_template
    family.imports
          .complete
          .where(account: account, type: type)
          .order(created_at: :desc)
          .first
  end

  def apply_template!(import_template)
    update!(
      import_template.attributes.slice(
        "date_col_label", "amount_col_label", "name_col_label",
        "category_col_label", "tags_col_label", "account_col_label",
        "qty_col_label", "ticker_col_label", "price_col_label",
        "entity_type_col_label", "notes_col_label", "currency_col_label",
        "date_format", "signage_convention", "number_format",
        "exchange_operating_mic_col_label",
        "rows_to_skip"
      )
    )
  end

  # Returns date formats that can successfully parse the file's date samples,
  # filtered to dates within reasonable_date_range.
  # Result: array of { label:, format:, preview: } hashes.
  # Subclasses should override #raw_date_samples to provide date strings.
  def valid_date_formats_with_preview
    first_sample = raw_date_samples.find(&:present?)
    return [] if first_sample.blank?

    Family::DATE_FORMATS.filter_map do |label, fmt|
      parsed = try_parse_date_sample(first_sample, format: fmt)
      next unless parsed
      next unless self.class.reasonable_date_range.cover?(Date.parse(parsed))

      { label: label, format: fmt, preview: parsed }
    end
  end

  # Returns raw date strings from the import file for format detection/preview.
  # Subclasses should override to extract dates from their specific format.
  def raw_date_samples
    []
  end

  # Attempts to parse a raw date sample with the given strptime format.
  # Returns ISO 8601 date string or nil. Subclasses can override for
  # format-specific normalization (e.g. QIF apostrophe dates).
  def try_parse_date_sample(sample, format:)
    Date.strptime(sample, format).iso8601
  rescue Date::Error, ArgumentError
    nil
  end

  def max_row_count
    10000
  end

  private
    def row_count_exceeded?
      rows_count > max_row_count
    end

    def import!
      # no-op, subclasses can implement for customization of algorithm
    end

    def default_row_name
      "Imported item"
    end

    def default_currency
      account&.currency || family.currency
    end

    def csv_value(row, label, *aliases)
      return if label.blank?

      [ label, *aliases ].each do |candidate|
        header = header_for(candidate)
        next if header.blank?

        value = row[header]
        return value if value.present?
      end

      nil
    end

    def header_for(candidate)
      return if candidate.blank?

      normalized_csv_headers[normalize_header(candidate)]
    end

    def normalized_csv_headers
      @normalized_csv_headers ||= begin
        grouped_headers = Array(csv_headers)
          .filter_map do |header|
            normalized = normalize_header(header)
            next if normalized.blank?

            [ normalized, header ]
          end
          .group_by(&:first)

        duplicate_headers = grouped_headers.values.filter_map do |headers|
          originals = headers.map(&:last).uniq
          originals if originals.many?
        end

        if duplicate_headers.any?
          errors.add(:base, :duplicate_headers, columns: duplicate_headers.map { |headers| headers.join(", ") }.join("; "))
          raise ActiveRecord::RecordInvalid, self
        end

        grouped_headers.transform_values { |headers| headers.first.last }
      end
    end

    def normalize_header(header)
      header.to_s.strip.downcase.gsub(/\*/, "").gsub(/[\s-]+/, "_")
    end

    def parsed_csv
      return @parsed_csv if defined?(@parsed_csv)

      csv_content = raw_file_str || ""
      if rows_to_skip.to_i > 0
        csv_content = csv_content.lines.drop(rows_to_skip).join
      end

      @parsed_csv = self.class.parse_csv_str(csv_content, col_sep: col_sep)
    end

    def sanitize_number(value)
      return "" if value.nil?

      format = NUMBER_FORMATS[number_format]
      return "" unless format

      # First, normalize spaces and remove any characters that aren't numbers, delimiters, separators, or minus signs
      sanitized = value.to_s.strip

      # Handle French/Scandinavian format specially
      if format[:delimiter] == " "
        sanitized = sanitized.gsub(/\s+/, "") # Remove all spaces first
      else
        sanitized = sanitized.gsub(/[^\d#{Regexp.escape(format[:delimiter])}#{Regexp.escape(format[:separator])}\-]/, "")

        # Replace delimiter with empty string
        if format[:delimiter].present?
          sanitized = sanitized.gsub(format[:delimiter], "")
        end
      end

      # Replace separator with period for proper float parsing
      if format[:separator].present?
        sanitized = sanitized.gsub(format[:separator], ".")
      end

      # Return empty string if not a valid number
      unless sanitized =~ /\A-?\d+\.?\d*\z/
        return ""
      end

      sanitized
    end

    def set_default_number_format
      self.number_format ||= "1,234.56" # Default to US/UK format
    end

    def custom_column_import_requires_identifier
      return unless amount_type_strategy == "custom_column"

      if amount_type_inflow_value.blank?
        errors.add(:base, I18n.t("imports.errors.custom_column_requires_inflow"))
      end
    end

    # Common encodings to try when UTF-8 detection fails
    # Windows-1250 is prioritized for Central/Eastern European languages
    COMMON_ENCODINGS = [ "Windows-1250", "Windows-1252", "ISO-8859-1", "ISO-8859-2" ].freeze

    def ensure_utf8_encoding
      # Handle nil or empty string first (before checking if changed)
      return if raw_file_str.nil? || raw_file_str.bytesize == 0

      # Only process if the attribute was changed
      # Use will_save_change_to_attribute? which is safer for binary data
      return unless will_save_change_to_raw_file_str?

      # If already valid UTF-8, nothing to do
      begin
        if raw_file_str.encoding == Encoding::UTF_8 && raw_file_str.valid_encoding?
          return
        end
      rescue ArgumentError
        # raw_file_str might have invalid encoding, continue to detection
      end

      # Detect encoding using rchardet
      begin
        require "rchardet"
        detection = CharDet.detect(raw_file_str)
        detected_encoding = detection["encoding"]
        confidence = detection["confidence"]

        # Only convert if we have reasonable confidence in the detection
        if detected_encoding && confidence > 0.75
          # Force encoding and convert to UTF-8
          self.raw_file_str = raw_file_str.force_encoding(detected_encoding).encode("UTF-8", invalid: :replace, undef: :replace)
        else
          # Fallback: try common encodings
          try_common_encodings
        end
      rescue LoadError
        # rchardet not available, fallback to trying common encodings
        try_common_encodings
      rescue ArgumentError, Encoding::CompatibilityError => e
        # Handle encoding errors by falling back to common encodings
        try_common_encodings
      end
    end

    def try_common_encodings
      COMMON_ENCODINGS.each do |encoding|
        begin
          test = raw_file_str.dup.force_encoding(encoding)
          if test.valid_encoding?
            self.raw_file_str = test.encode("UTF-8", invalid: :replace, undef: :replace)
            return
          end
        rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
          next
        end
      end

      # If nothing worked, force UTF-8 and replace invalid bytes
      self.raw_file_str = raw_file_str.force_encoding("UTF-8").scrub("?")
    end

    def account_belongs_to_family
      return if account.nil?
      return if account.family_id == family_id

      errors.add(:account, "must belong to your family")
    end

    def rows_to_skip_within_file_bounds
      return if raw_file_str.blank?
      return if rows_to_skip.to_i == 0

      line_count = raw_file_str.lines.count

      if rows_to_skip.to_i >= line_count
        errors.add(:rows_to_skip, "must be less than the number of lines in the file (#{line_count})")
      end
    end
end