mirror of
https://github.com/we-promise/sure.git
synced 2026-04-11 16:24:51 +00:00
* feat: improve QIF import date format selection - Added a reusable date format auto-detection method. - Show a live preview of the first parsed date that updates client-side as the user changes the dropdown selection, via a new qif-date-format Stimulus controller. - Show an error alert and disable the submit button when no supported date format can parse the file's dates. * A few polishing fixes: - Missing return on redirects Stale REASONABLE_DATE_RANGE constant. - Replaced the frozen constant with a class method Bare inline rescue — Replaced Date.strptime(s, fmt) rescue nil with an explicit begin/rescue catching. - save!(validate: false) in controller — Changed to update_column(:column_mappings, ...) in qif_category_selections_controller.rb:22, matching the pattern used in detect_and_set_qif_date_format!. - Unescaped JSON in HTML attribute — Replaced the raw <div> with tag.div ... do block in show.html.erb:16, letting Rails properly escape the data attribute value. * fix: address review feedback for QIF date format feature - Add missing `return` after redirect for non-QIF imports - Pass date_format to parse_opening_balance in will_adjust_opening_anchor? - Return empty array when no usable date sample exists for format preview - Add sr-only label to date format select for accessibility - Consolidate duplicate try_parse_date/parse_qif_date into single method - Remove misleading ambiguity scoring comment from detect_date_format - Skip redundant sync_mappings when date format already triggered a sync Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Use %{product_name} interpolation in locale strings --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
473 lines
16 KiB
Ruby
473 lines
16 KiB
Ruby
# Parses QIF (Quicken Interchange Format) files.
|
||
#
|
||
# A QIF file is a plain-text format exported by Quicken. It is divided into
|
||
# sections, each introduced by a "!Type:<name>" header line. Records within
|
||
# a section are terminated by a "^" line. Each data line starts with a single
|
||
# letter field code followed immediately by the value.
|
||
#
|
||
# Sections handled:
|
||
# !Type:Tag – tag definitions (N=name, D=description)
|
||
# !Type:Cat – category definitions (N=name, D=description, I=income, E=expense)
|
||
# !Type:Security – security definitions (N=name, S=ticker, T=type)
|
||
# !Type:CCard / !Type:Bank / !Type:Cash / !Type:Oth L – transactions
|
||
# !Type:Invst – investment transactions
|
||
#
|
||
# Transaction field codes:
|
||
# D date M/ D'YY or MM/DD'YYYY
|
||
# T amount may include commas, e.g. "-1,234.56"
|
||
# U amount same as T (alternate field)
|
||
# P payee
|
||
# M memo
|
||
# L category plain name or [TransferAccount]; /Tag suffix is supported
|
||
# N check/ref (not a tag – the check number or reference)
|
||
# C cleared X = cleared, * = reconciled
|
||
# ^ end of record
|
||
#
|
||
# Investment-specific field codes (in !Type:Invst records):
|
||
# N action Buy, Sell, Div, XIn, XOut, IntInc, CGLong, CGShort, etc.
|
||
# Y security security name (matches N field in !Type:Security)
|
||
# I price price per share
|
||
# Q quantity number of shares
|
||
# T total total cash amount of transaction
|
||
module QifParser
|
||
TRANSACTION_TYPES = %w[CCard Bank Cash Invst Oth\ L Oth\ A].freeze
|
||
|
||
# Investment action types that create Trade records (buy or sell shares).
|
||
BUY_LIKE_ACTIONS = %w[Buy ReinvDiv Cover].freeze
|
||
SELL_LIKE_ACTIONS = %w[Sell ShtSell].freeze
|
||
TRADE_ACTIONS = (BUY_LIKE_ACTIONS + SELL_LIKE_ACTIONS).freeze
|
||
|
||
# Investment action types that create Transaction records.
|
||
INFLOW_TRANSACTION_ACTIONS = %w[Div IntInc XIn CGLong CGShort MiscInc].freeze
|
||
OUTFLOW_TRANSACTION_ACTIONS = %w[XOut MiscExp].freeze
|
||
|
||
ParsedTransaction = Struct.new(
|
||
:date, :amount, :payee, :memo, :category, :tags, :check_num, :cleared, :split,
|
||
keyword_init: true
|
||
)
|
||
|
||
ParsedCategory = Struct.new(:name, :description, :income, keyword_init: true)
|
||
ParsedTag = Struct.new(:name, :description, keyword_init: true)
|
||
|
||
ParsedSecurity = Struct.new(:name, :ticker, :security_type, keyword_init: true)
|
||
|
||
ParsedInvestmentTransaction = Struct.new(
|
||
:date, :action, :security_name, :security_ticker,
|
||
:price, :qty, :amount, :memo, :payee, :category, :tags,
|
||
keyword_init: true
|
||
)
|
||
|
||
# ------------------------------------------------------------------
|
||
# Public API
|
||
# ------------------------------------------------------------------
|
||
|
||
# Transcodes raw file bytes to UTF-8.
|
||
# Quicken on Windows writes QIF files in a Windows code page that varies by region:
|
||
# Windows-1252 – North America, Western Europe
|
||
# Windows-1250 – Central/Eastern Europe (Poland, Czech Republic, Hungary, …)
|
||
#
|
||
# We try each encoding with undef: :raise so we only accept an encoding when
|
||
# every byte in the file is defined in that code page. Windows-1252 has five
|
||
# undefined byte values (0x81, 0x8D, 0x8F, 0x90, 0x9D); if any are present we
|
||
# fall through to Windows-1250 which covers those slots differently.
|
||
FALLBACK_ENCODINGS = %w[Windows-1252 Windows-1250].freeze
|
||
|
||
def self.normalize_encoding(content)
|
||
return content if content.nil?
|
||
|
||
binary = content.b # Force ASCII-8BIT; never raises on invalid bytes
|
||
|
||
utf8_attempt = binary.dup.force_encoding("UTF-8")
|
||
return utf8_attempt if utf8_attempt.valid_encoding?
|
||
|
||
FALLBACK_ENCODINGS.each do |encoding|
|
||
begin
|
||
return binary.encode("UTF-8", encoding)
|
||
rescue Encoding::UndefinedConversionError
|
||
next
|
||
end
|
||
end
|
||
|
||
# Last resort: replace any remaining undefined bytes rather than raise
|
||
binary.encode("UTF-8", "Windows-1252", invalid: :replace, undef: :replace, replace: "")
|
||
end
|
||
|
||
# Returns true if the content looks like a valid QIF file.
|
||
def self.valid?(content)
|
||
return false if content.blank?
|
||
|
||
binary = content.b
|
||
binary.include?("!Type:")
|
||
end
|
||
|
||
# Returns the transaction account type string (e.g. "CCard", "Bank", "Invst").
|
||
# Skips metadata sections (Tag, Cat, Security, Prices) which are not account data.
|
||
def self.account_type(content)
|
||
return nil if content.blank?
|
||
|
||
content.scan(/^!Type:(.+)/i).flatten
|
||
.map(&:strip)
|
||
.reject { |t| %w[Tag Cat Security Prices].include?(t) }
|
||
.first
|
||
end
|
||
|
||
# Parses all transactions from the file, excluding the Opening Balance entry.
|
||
# Returns an array of ParsedTransaction structs.
|
||
def self.parse(content, date_format: "%m/%d/%Y")
|
||
return [] unless valid?(content)
|
||
|
||
content = normalize_encoding(content)
|
||
content = normalize_line_endings(content)
|
||
|
||
type = account_type(content)
|
||
return [] unless type
|
||
|
||
section = extract_section(content, type)
|
||
return [] unless section
|
||
|
||
parse_records(section).filter_map { |record| build_transaction(record, date_format: date_format) }
|
||
end
|
||
|
||
# Returns the opening balance entry from the QIF file, if present.
|
||
# In Quicken's QIF format, the first transaction of a bank/cash account is often
|
||
# an "Opening Balance" record with payee "Opening Balance". This entry is NOT a
|
||
# real transaction – it is the account's starting balance.
|
||
#
|
||
# Returns a hash { date: Date, amount: BigDecimal } or nil.
|
||
def self.parse_opening_balance(content, date_format: "%m/%d/%Y")
|
||
return nil unless valid?(content)
|
||
|
||
content = normalize_encoding(content)
|
||
content = normalize_line_endings(content)
|
||
|
||
type = account_type(content)
|
||
return nil unless type
|
||
|
||
section = extract_section(content, type)
|
||
return nil unless section
|
||
|
||
record = parse_records(section).find { |r| r["P"]&.strip == "Opening Balance" }
|
||
return nil unless record
|
||
|
||
date = parse_qif_date(record["D"], date_format: date_format)
|
||
amount = parse_qif_amount(record["T"] || record["U"])
|
||
return nil unless date && amount
|
||
|
||
{ date: Date.parse(date), amount: amount.to_d }
|
||
end
|
||
|
||
# Parses categories from the !Type:Cat section.
|
||
# Returns an array of ParsedCategory structs.
|
||
def self.parse_categories(content)
|
||
return [] if content.blank?
|
||
|
||
content = normalize_encoding(content)
|
||
content = normalize_line_endings(content)
|
||
|
||
section = extract_section(content, "Cat")
|
||
return [] unless section
|
||
|
||
parse_records(section).filter_map do |record|
|
||
next unless record["N"].present?
|
||
|
||
ParsedCategory.new(
|
||
name: record["N"],
|
||
description: record["D"],
|
||
income: record.key?("I") && !record.key?("E")
|
||
)
|
||
end
|
||
end
|
||
|
||
# Parses tags from the !Type:Tag section.
|
||
# Returns an array of ParsedTag structs.
|
||
def self.parse_tags(content)
|
||
return [] if content.blank?
|
||
|
||
content = normalize_encoding(content)
|
||
content = normalize_line_endings(content)
|
||
|
||
section = extract_section(content, "Tag")
|
||
return [] unless section
|
||
|
||
parse_records(section).filter_map do |record|
|
||
next unless record["N"].present?
|
||
|
||
ParsedTag.new(
|
||
name: record["N"],
|
||
description: record["D"]
|
||
)
|
||
end
|
||
end
|
||
|
||
# Parses all !Type:Security sections and returns an array of ParsedSecurity structs.
|
||
# Each security in a QIF file gets its own !Type:Security header, so we scan
|
||
# for all occurrences rather than just the first.
|
||
def self.parse_securities(content)
|
||
return [] if content.blank?
|
||
|
||
content = normalize_encoding(content)
|
||
content = normalize_line_endings(content)
|
||
|
||
securities = []
|
||
|
||
content.scan(/^!Type:Security[^\n]*\n(.*?)(?=^!Type:|\z)/mi) do |captures|
|
||
parse_records(captures[0]).each do |record|
|
||
next unless record["N"].present? && record["S"].present?
|
||
|
||
securities << ParsedSecurity.new(
|
||
name: record["N"].strip,
|
||
ticker: record["S"].strip,
|
||
security_type: record["T"]&.strip
|
||
)
|
||
end
|
||
end
|
||
|
||
securities
|
||
end
|
||
|
||
# Parses investment transactions from the !Type:Invst section.
|
||
# Uses the !Type:Security sections to resolve security names to tickers.
|
||
# Returns an array of ParsedInvestmentTransaction structs.
|
||
def self.parse_investment_transactions(content, date_format: "%m/%d/%Y")
|
||
return [] unless valid?(content)
|
||
|
||
content = normalize_encoding(content)
|
||
content = normalize_line_endings(content)
|
||
|
||
ticker_by_name = parse_securities(content).each_with_object({}) { |s, h| h[s.name] = s.ticker }
|
||
|
||
section = extract_section(content, "Invst")
|
||
return [] unless section
|
||
|
||
parse_records(section).filter_map { |record| build_investment_transaction(record, ticker_by_name, date_format: date_format) }
|
||
end
|
||
|
||
# ------------------------------------------------------------------
|
||
# Private helpers
|
||
# ------------------------------------------------------------------
|
||
|
||
def self.normalize_line_endings(content)
|
||
content.gsub(/\r\n/, "\n").gsub(/\r/, "\n")
|
||
end
|
||
private_class_method :normalize_line_endings
|
||
|
||
# Extracts the raw text of a named section (everything after its !Type: header
|
||
# up to the next !Type: header or end-of-file).
|
||
def self.extract_section(content, type_name)
|
||
escaped = Regexp.escape(type_name)
|
||
pattern = /^!Type:#{escaped}[^\n]*\n(.*?)(?=^!Type:|\z)/mi
|
||
content.match(pattern)&.captures&.first
|
||
end
|
||
private_class_method :extract_section
|
||
|
||
# Splits a section into an array of field-code => value hashes.
|
||
# Single-letter codes with no value (e.g. "I", "E", "T") are stored with nil.
|
||
# Split transactions (multiple S/$/E lines) are flagged with "_split" => true.
|
||
def self.parse_records(section_content)
|
||
records = []
|
||
current = {}
|
||
|
||
section_content.each_line do |line|
|
||
line = line.chomp
|
||
next if line.blank?
|
||
|
||
if line == "^"
|
||
records << current unless current.empty?
|
||
current = {}
|
||
else
|
||
code = line[0]
|
||
value = line[1..]&.strip
|
||
next unless code
|
||
|
||
# Mark records that contain split fields (S = split category, $ = split amount)
|
||
current["_split"] = true if code == "S"
|
||
|
||
# Flag fields like "I" (income) and "E" (expense) have no meaningful value
|
||
current[code] = value.presence
|
||
end
|
||
end
|
||
|
||
records << current unless current.empty?
|
||
records
|
||
end
|
||
private_class_method :parse_records
|
||
|
||
def self.build_transaction(record, date_format: "%m/%d/%Y")
|
||
# "Opening Balance" is a Quicken convention for the account's starting balance –
|
||
# it is not a real transaction and must not be imported as one.
|
||
return nil if record["P"]&.strip == "Opening Balance"
|
||
|
||
raw_date = record["D"]
|
||
raw_amount = record["T"] || record["U"]
|
||
|
||
return nil unless raw_date.present? && raw_amount.present?
|
||
|
||
date = parse_qif_date(raw_date, date_format: date_format)
|
||
amount = parse_qif_amount(raw_amount)
|
||
|
||
return nil unless date && amount
|
||
|
||
category, tags = parse_category_and_tags(record["L"])
|
||
|
||
ParsedTransaction.new(
|
||
date: date,
|
||
amount: amount,
|
||
payee: record["P"],
|
||
memo: record["M"],
|
||
category: category,
|
||
tags: tags,
|
||
check_num: record["N"],
|
||
cleared: record["C"],
|
||
split: record["_split"] == true
|
||
)
|
||
end
|
||
private_class_method :build_transaction
|
||
|
||
# Separates the category name from any tag(s) appended with a "/" delimiter.
|
||
# Transfer accounts are wrapped in brackets – treated as no category.
|
||
#
|
||
# Examples:
|
||
# "Food & Dining" → ["Food & Dining", []]
|
||
# "Food & Dining/EUROPE2025" → ["Food & Dining", ["EUROPE2025"]]
|
||
# "[TD - Chequing]" → ["", []]
|
||
def self.parse_category_and_tags(l_field)
|
||
return [ "", [] ] if l_field.blank?
|
||
|
||
# Transfer account reference
|
||
return [ "", [] ] if l_field.start_with?("[")
|
||
|
||
# Quicken uses "--Split--" as a placeholder category for split transactions
|
||
return [ "", [] ] if l_field.strip.match?(/\A--Split--\z/i)
|
||
|
||
parts = l_field.split("/", 2)
|
||
category = parts[0].strip
|
||
tags = parts[1].present? ? parts[1].split(":").map(&:strip).reject(&:blank?) : []
|
||
|
||
[ category, tags ]
|
||
end
|
||
private_class_method :parse_category_and_tags
|
||
|
||
# Normalizes a QIF date string into a standard format that Date.strptime can
|
||
# handle. QIF files use Quicken-specific conventions:
|
||
#
|
||
# - Apostrophe as year separator: 6/ 4'20 or 6/ 4'2020
|
||
# - Optional spaces around components: 6/ 4'20 → 6/4/20
|
||
# - Dot separators: 04.06.2020
|
||
# - Dash separators: 04-06-2020
|
||
#
|
||
# This method:
|
||
# 1. Strips whitespace
|
||
# 2. Replaces the Quicken apostrophe with the file's date separator
|
||
# 3. Expands 2-digit years to 4-digit (00-99 → 2000-2099, capped at current year)
|
||
# 4. Returns a cleaned date string suitable for Date.strptime
|
||
def self.normalize_qif_date(date_str)
|
||
return nil if date_str.blank?
|
||
|
||
s = date_str.strip
|
||
|
||
# Replace Quicken apostrophe year separator with the preceding separator
|
||
if s.include?("'")
|
||
sep = s.match(%r{[/.\-]})&.to_s || "/"
|
||
s = s.gsub("'", sep)
|
||
end
|
||
|
||
# Remove internal spaces (e.g. "6/ 4/20" → "6/4/20")
|
||
s = s.gsub(/\s+/, "")
|
||
|
||
# Expand 2-digit year at end to 4-digit, but only when the string doesn't
|
||
# already contain a 4-digit number (which would be a full year).
|
||
if !s.match?(/\d{4}/) && (m = s.match(%r{\A(.+[/.\-])(\d{2})\z}))
|
||
short_year = m[2].to_i
|
||
full_year = 2000 + short_year
|
||
full_year -= 100 if full_year > Date.today.year
|
||
s = "#{m[1]}#{full_year}"
|
||
end
|
||
|
||
s
|
||
end
|
||
private_class_method :normalize_qif_date
|
||
|
||
# Parses a QIF date string into an ISO 8601 date string using the given
|
||
# strptime format. The date is first normalized (apostrophe → separator,
|
||
# 2-digit year expansion, whitespace removal) before parsing.
|
||
#
|
||
# +date_format+ should be a strptime format string such as "%m/%d/%Y" or
|
||
# "%d.%m.%Y". Defaults to "%m/%d/%Y" (US convention) for backwards
|
||
# compatibility.
|
||
# Attempts to parse a raw QIF date string with the given format.
|
||
# Returns the parsed ISO 8601 date string, or nil if parsing fails.
|
||
def self.try_parse_date(date_str, date_format: "%m/%d/%Y")
|
||
normalized = normalize_qif_date(date_str)
|
||
return nil unless normalized
|
||
|
||
Date.strptime(normalized, date_format).iso8601
|
||
rescue Date::Error, ArgumentError
|
||
nil
|
||
end
|
||
|
||
private_class_method def self.parse_qif_date(date_str, date_format: "%m/%d/%Y")
|
||
try_parse_date(date_str, date_format: date_format)
|
||
end
|
||
|
||
# Extracts all raw date strings from D-fields in transaction sections only.
|
||
# Skips metadata sections (Cat, Tag, Security) where D means "description".
|
||
# Used by Import.detect_date_format to sample dates before parsing.
|
||
def self.extract_raw_dates(content)
|
||
return [] if content.blank?
|
||
|
||
content = normalize_encoding(content)
|
||
content = normalize_line_endings(content)
|
||
|
||
transaction_sections = TRANSACTION_TYPES.filter_map { |type| extract_section(content, type) }
|
||
transaction_sections.flat_map { |section| section.scan(/^D(.+)$/i).flatten }
|
||
.map { |d| normalize_qif_date(d) }
|
||
.compact
|
||
end
|
||
|
||
# Strips thousands-separator commas and returns a clean decimal string.
|
||
def self.parse_qif_amount(amount_str)
|
||
return nil if amount_str.blank?
|
||
|
||
cleaned = amount_str.gsub(",", "").strip
|
||
cleaned =~ /\A-?\d+\.?\d*\z/ ? cleaned : nil
|
||
end
|
||
private_class_method :parse_qif_amount
|
||
|
||
# Builds a ParsedInvestmentTransaction from a raw record hash.
|
||
# ticker_by_name maps security names (N field in !Type:Security) to tickers (S field).
|
||
def self.build_investment_transaction(record, ticker_by_name, date_format: "%m/%d/%Y")
|
||
action = record["N"]&.strip
|
||
return nil unless action.present?
|
||
|
||
raw_date = record["D"]
|
||
return nil unless raw_date.present?
|
||
|
||
date = parse_qif_date(raw_date, date_format: date_format)
|
||
return nil unless date
|
||
|
||
security_name = record["Y"]&.strip
|
||
security_ticker = ticker_by_name[security_name] || security_name
|
||
|
||
price = parse_qif_amount(record["I"])
|
||
qty = parse_qif_amount(record["Q"])
|
||
amount = parse_qif_amount(record["T"] || record["U"])
|
||
|
||
category, tags = parse_category_and_tags(record["L"])
|
||
|
||
ParsedInvestmentTransaction.new(
|
||
date: date,
|
||
action: action,
|
||
security_name: security_name,
|
||
security_ticker: security_ticker,
|
||
price: price,
|
||
qty: qty,
|
||
amount: amount,
|
||
memo: record["M"]&.strip,
|
||
payee: record["P"]&.strip,
|
||
category: category,
|
||
tags: tags
|
||
)
|
||
end
|
||
private_class_method :build_investment_transaction
|
||
end
|