From 45c61bcbc1d71670085b0be3afc7e8d42a057fc5 Mon Sep 17 00:00:00 2001 From: Guillem Arias Date: Mon, 25 May 2026 16:38:18 +0200 Subject: [PATCH] feat(ai): add Anthropic batch ops + LLM cost ledger (2/5) Implements auto_categorize, auto_detect_merchants, and enhance_provider_merchants on Provider::Anthropic via forced tool calls, plus the cost-ledger plumbing they need. - Provider::Anthropic::AutoCategorizer, AutoMerchantDetector, ProviderMerchantEnhancer each define a single output tool whose input_schema mirrors the desired output, then force the model to call it via tool_choice: { type: "tool", name: ..., disable_parallel_tool_use: true }. Anthropic guarantees the tool_use.input matches the schema, so there is no JSON parsing fragility, no tag stripping, and no json_object/json_schema fallback ladders. - Concerns::UsageRecorder mirrors the OpenAI sibling but persists cache_creation_input_tokens / cache_read_input_tokens to dedicated columns instead of metadata. - Migration adds cache_creation_tokens, cache_read_tokens (nullable integers) to llm_usages. OpenAI rows leave them null. - LlmUsage::PRICING gains Claude 4.x rows (opus-4-7 $15/$75, sonnet-4-6 $3/$15, haiku-4-5 $1/$5 per MTok). infer_provider returns "anthropic" for claude-* via the existing exact/prefix lookup. - Provider::Anthropic#chat_response now persists cache columns directly rather than stashing them in metadata. - 25-transaction batch cap mirrors the OpenAI provider so the cost ledger sees the same shape regardless of which provider ran a batch. Tests cover the forced-tool-call path, null/None normalization, case-insensitive merchant matching, the missing-tool_use error path, and Anthropic-specific pricing + provider inference on LlmUsage. Stacked on #1983 (PR 1/5). 3/5 PDF + vision next. --- app/models/llm_usage.rb | 9 + app/models/provider/anthropic.rb | 82 +++++++- .../provider/anthropic/auto_categorizer.rb | 176 ++++++++++++++++ .../anthropic/auto_merchant_detector.rb | 188 ++++++++++++++++++ .../anthropic/concerns/usage_recorder.rb | 75 +++++++ .../anthropic/provider_merchant_enhancer.rb | 154 ++++++++++++++ ...dd_anthropic_cache_tokens_to_llm_usages.rb | 9 + db/schema.rb | 6 +- test/models/llm_usage_test.rb | 35 ++++ .../anthropic/auto_categorizer_test.rb | 124 ++++++++++++ .../anthropic/auto_merchant_detector_test.rb | 115 +++++++++++ .../provider_merchant_enhancer_test.rb | 81 ++++++++ 12 files changed, 1046 insertions(+), 8 deletions(-) create mode 100644 app/models/provider/anthropic/auto_categorizer.rb create mode 100644 app/models/provider/anthropic/auto_merchant_detector.rb create mode 100644 app/models/provider/anthropic/concerns/usage_recorder.rb create mode 100644 app/models/provider/anthropic/provider_merchant_enhancer.rb create mode 100644 db/migrate/20260525120000_add_anthropic_cache_tokens_to_llm_usages.rb create mode 100644 test/models/llm_usage_test.rb create mode 100644 test/models/provider/anthropic/auto_categorizer_test.rb create mode 100644 test/models/provider/anthropic/auto_merchant_detector_test.rb create mode 100644 test/models/provider/anthropic/provider_merchant_enhancer_test.rb diff --git a/app/models/llm_usage.rb b/app/models/llm_usage.rb index 789a86ca9..d4e2eeda0 100644 --- a/app/models/llm_usage.rb +++ b/app/models/llm_usage.rb @@ -37,6 +37,15 @@ class LlmUsage < ApplicationRecord "google" => { "gemini-2.5-pro" => { prompt: 1.25, completion: 10.00 }, "gemini-2.5-flash" => { prompt: 0.3, completion: 2.50 } + }, + # Anthropic pricing per 1M tokens (Claude 4.x family, as of May 2026) + # Source: https://www.anthropic.com/pricing + "anthropic" => { + "claude-opus-4-7" => { prompt: 15.00, completion: 75.00 }, + "claude-opus-4-6" => { prompt: 15.00, completion: 75.00 }, + "claude-sonnet-4-6" => { prompt: 3.00, completion: 15.00 }, + "claude-sonnet-4-5" => { prompt: 3.00, completion: 15.00 }, + "claude-haiku-4-5" => { prompt: 1.00, completion: 5.00 } } }.freeze diff --git a/app/models/provider/anthropic.rb b/app/models/provider/anthropic.rb index 5d530c62a..3fafba774 100644 --- a/app/models/provider/anthropic.rb +++ b/app/models/provider/anthropic.rb @@ -66,18 +66,86 @@ class Provider::Anthropic < Provider @base_url.present? end - # Batch operations land in PR2 — keep the LlmConcept contract honest by - # surfacing a clear error if a caller routes here too early. def auto_categorize(transactions: [], user_categories: [], model: "", family: nil, json_mode: nil) - raise Error, "auto_categorize not yet implemented for Provider::Anthropic" + with_provider_response do + raise Error, "Too many transactions to auto-categorize. Max is 25 per request." if transactions.size > 25 + if user_categories.blank? + family_id = family&.id || "unknown" + Rails.logger.error("Cannot auto-categorize transactions for family #{family_id}: no categories available") + raise Error, "No categories available for auto-categorization" + end + + effective_model = model.presence || @default_model + + trace = create_langfuse_trace( + name: "anthropic.auto_categorize", + input: { transactions: transactions, user_categories: user_categories } + ) + + result = AutoCategorizer.new( + client, + model: effective_model, + transactions: transactions, + user_categories: user_categories, + langfuse_trace: trace, + family: family + ).auto_categorize + + upsert_langfuse_trace(trace: trace, output: result.map(&:to_h)) + + result + end end def auto_detect_merchants(transactions: [], user_merchants: [], model: "", family: nil, json_mode: nil) - raise Error, "auto_detect_merchants not yet implemented for Provider::Anthropic" + with_provider_response do + raise Error, "Too many transactions to auto-detect merchants. Max is 25 per request." if transactions.size > 25 + + effective_model = model.presence || @default_model + + trace = create_langfuse_trace( + name: "anthropic.auto_detect_merchants", + input: { transactions: transactions, user_merchants: user_merchants } + ) + + result = AutoMerchantDetector.new( + client, + model: effective_model, + transactions: transactions, + user_merchants: user_merchants, + langfuse_trace: trace, + family: family + ).auto_detect_merchants + + upsert_langfuse_trace(trace: trace, output: result.map(&:to_h)) + + result + end end def enhance_provider_merchants(merchants: [], model: "", family: nil, json_mode: nil) - raise Error, "enhance_provider_merchants not yet implemented for Provider::Anthropic" + with_provider_response do + raise Error, "Too many merchants to enhance. Max is 25 per request." if merchants.size > 25 + + effective_model = model.presence || @default_model + + trace = create_langfuse_trace( + name: "anthropic.enhance_provider_merchants", + input: { merchants: merchants } + ) + + result = ProviderMerchantEnhancer.new( + client, + model: effective_model, + merchants: merchants, + langfuse_trace: trace, + family: family + ).enhance_merchants + + upsert_langfuse_trace(trace: trace, output: result.map(&:to_h)) + + result + end end def supports_pdf_processing?(model: @default_model) @@ -345,8 +413,10 @@ class Provider::Anthropic < Provider prompt_tokens: prompt_tokens, completion_tokens: completion_tokens, total_tokens: total_tokens, + cache_creation_tokens: usage["cache_creation_input_tokens"], + cache_read_tokens: usage["cache_read_input_tokens"], estimated_cost: estimated_cost, - metadata: usage.slice("cache_creation_input_tokens", "cache_read_input_tokens").compact + metadata: {} ) rescue => e Rails.logger.error("Failed to record LLM usage: #{e.message}") diff --git a/app/models/provider/anthropic/auto_categorizer.rb b/app/models/provider/anthropic/auto_categorizer.rb new file mode 100644 index 000000000..816a954e9 --- /dev/null +++ b/app/models/provider/anthropic/auto_categorizer.rb @@ -0,0 +1,176 @@ +class Provider::Anthropic::AutoCategorizer + include Provider::Anthropic::Concerns::UsageRecorder + + TOOL_NAME = "report_categorizations".freeze + + attr_reader :client, :model, :transactions, :user_categories, :langfuse_trace, :family + + def initialize(client, model:, transactions: [], user_categories: [], langfuse_trace: nil, family: nil) + @client = client + @model = model + @transactions = transactions + @user_categories = user_categories + @langfuse_trace = langfuse_trace + @family = family + end + + def auto_categorize + span = langfuse_trace&.span(name: "auto_categorize_api_call", input: { + model: model, + transactions: transactions, + user_categories: user_categories + }) + + response = client.messages.create( + model: model, + max_tokens: max_tokens, + system_: instructions, + messages: [ { role: "user", content: user_message } ], + tools: [ output_tool ], + tool_choice: { type: "tool", name: TOOL_NAME, disable_parallel_tool_use: true } + ) + + categorizations = extract_categorizations(response) + result = build_response(categorizations) + + record_usage(model, response.usage, operation: "auto_categorize", metadata: { + transaction_count: transactions.size, + category_count: user_categories.size + }) + + span&.end(output: result.map(&:to_h), usage: usage_hash(response.usage)) + result + rescue => e + span&.end(output: { error: e.message }, level: "ERROR") + record_usage_error(model, operation: "auto_categorize", error: e, metadata: { + transaction_count: transactions.size, + category_count: user_categories.size + }) + raise + end + + private + AutoCategorization = Provider::LlmConcept::AutoCategorization + + def max_tokens + ENV.fetch("ANTHROPIC_MAX_TOKENS", 4096).to_i + end + + def output_tool + { + name: TOOL_NAME, + description: "Return the categorization decision for each input transaction.", + input_schema: { + type: "object", + properties: { + categorizations: { + type: "array", + description: "One categorization per input transaction.", + items: { + type: "object", + properties: { + transaction_id: { + type: "string", + description: "The internal ID of the original transaction", + enum: transactions.map { |t| t[:id] } + }, + category_name: { + type: [ "string", "null" ], + description: "Matched category name from the user's categories, or null when uncertain.", + enum: [ *user_categories.map { |c| c[:name] }, nil ] + } + }, + required: [ "transaction_id", "category_name" ], + additionalProperties: false + } + } + }, + required: [ "categorizations" ], + additionalProperties: false + } + } + end + + def instructions + <<~INSTRUCTIONS.strip_heredoc + You are an assistant to a consumer personal finance app. You will be provided a list of the user's + transactions and a list of the user's categories. Your job is to auto-categorize each transaction + and return the result via the report_categorizations tool. + + Follow ALL the rules below: + + - Return one result per transaction, correlated by transaction_id + - Use the most specific category possible (subcategory over parent category) + - Any category may be used regardless of whether the transaction is income or expense + - Return null for category_name when you are not 60%+ confident, or when the description is + generic/ambiguous (e.g., "POS DEBIT", "ACH WITHDRAWAL", "CHECK #1234") + - The `hint` field on a transaction (when present) comes from third-party aggregators and may + or may not match the user's categories — treat it as a weak signal + INSTRUCTIONS + end + + def user_message + <<~MESSAGE.strip_heredoc + Here are the user's available categories in JSON: + + ```json + #{user_categories.to_json} + ``` + + Auto-categorize the following transactions: + + ```json + #{transactions.to_json} + ``` + MESSAGE + end + + def extract_categorizations(response) + tool_use = Array(response.content).find { |block| block_type(block) == :tool_use } + raise Provider::Anthropic::Error, "Model did not invoke #{TOOL_NAME}" unless tool_use + + input = block_input(tool_use) + input = JSON.parse(input) if input.is_a?(String) + categorizations = input.is_a?(Hash) ? (input["categorizations"] || input[:categorizations]) : nil + + raise Provider::Anthropic::Error, "Tool call missing categorizations" unless categorizations.is_a?(Array) + categorizations + end + + def build_response(categorizations) + categorizations.map do |c| + category_name = c["category_name"] || c[:category_name] + AutoCategorization.new( + transaction_id: c["transaction_id"] || c[:transaction_id], + category_name: normalize_category(category_name) + ) + end + end + + def normalize_category(value) + return nil if value.nil? + str = value.to_s.strip + return nil if str.empty? || str.casecmp("null").zero? + + match = user_categories.find { |c| c[:name].to_s.casecmp(str).zero? } + match ? match[:name] : str + end + + def block_type(block) + raw = block.respond_to?(:type) ? block.type : block[:type] || block["type"] + raw.to_s.to_sym + end + + def block_input(block) + block.respond_to?(:input) ? block.input : (block[:input] || block["input"]) + end + + def usage_hash(raw_usage) + return {} unless raw_usage + { + "input_tokens" => raw_usage.input_tokens.to_i, + "output_tokens" => raw_usage.output_tokens.to_i, + "total_tokens" => raw_usage.input_tokens.to_i + raw_usage.output_tokens.to_i + } + end +end diff --git a/app/models/provider/anthropic/auto_merchant_detector.rb b/app/models/provider/anthropic/auto_merchant_detector.rb new file mode 100644 index 000000000..ee39e1f77 --- /dev/null +++ b/app/models/provider/anthropic/auto_merchant_detector.rb @@ -0,0 +1,188 @@ +class Provider::Anthropic::AutoMerchantDetector + include Provider::Anthropic::Concerns::UsageRecorder + + TOOL_NAME = "report_merchants".freeze + + attr_reader :client, :model, :transactions, :user_merchants, :langfuse_trace, :family + + def initialize(client, model:, transactions: [], user_merchants: [], langfuse_trace: nil, family: nil) + @client = client + @model = model + @transactions = transactions + @user_merchants = user_merchants + @langfuse_trace = langfuse_trace + @family = family + end + + def auto_detect_merchants + span = langfuse_trace&.span(name: "auto_detect_merchants_api_call", input: { + model: model, + transactions: transactions, + user_merchants: user_merchants + }) + + response = client.messages.create( + model: model, + max_tokens: max_tokens, + system_: instructions, + messages: [ { role: "user", content: user_message } ], + tools: [ output_tool ], + tool_choice: { type: "tool", name: TOOL_NAME, disable_parallel_tool_use: true } + ) + + merchants_data = extract_merchants(response) + result = build_response(merchants_data) + + record_usage(model, response.usage, operation: "auto_detect_merchants", metadata: { + transaction_count: transactions.size, + merchant_count: user_merchants.size + }) + + span&.end(output: result.map(&:to_h), usage: usage_hash(response.usage)) + result + rescue => e + span&.end(output: { error: e.message }, level: "ERROR") + record_usage_error(model, operation: "auto_detect_merchants", error: e, metadata: { + transaction_count: transactions.size, + merchant_count: user_merchants.size + }) + raise + end + + private + AutoDetectedMerchant = Provider::LlmConcept::AutoDetectedMerchant + + def max_tokens + ENV.fetch("ANTHROPIC_MAX_TOKENS", 4096).to_i + end + + def output_tool + { + name: TOOL_NAME, + description: "Return the detected business name and website URL for each input transaction.", + input_schema: { + type: "object", + properties: { + merchants: { + type: "array", + description: "One detection result per input transaction.", + items: { + type: "object", + properties: { + transaction_id: { + type: "string", + description: "The internal ID of the original transaction", + enum: transactions.map { |t| t[:id] } + }, + business_name: { + type: [ "string", "null" ], + description: "Detected business name, or null if uncertain or generic" + }, + business_url: { + type: [ "string", "null" ], + description: "Business website without the www. subdomain (e.g., \"amazon.com\"), or null if uncertain" + } + }, + required: [ "transaction_id", "business_name", "business_url" ], + additionalProperties: false + } + } + }, + required: [ "merchants" ], + additionalProperties: false + } + } + end + + def instructions + <<~INSTRUCTIONS.strip_heredoc + You are an assistant to a consumer personal finance app. Detect the business name and website URL + for each transaction and return the result via the report_merchants tool. + + Follow ALL the rules below: + + - One result per transaction, correlated by transaction_id + - Do NOT include the www. subdomain in business_url ("amazon.com", not "www.amazon.com") + - User-provided merchants should only be used when the match is unambiguous + - Favor null over false positives; only return values when 80%+ confident + - NEVER return a name/URL for generic descriptions ("Paycheck", "Local diner", "ATM", "POS DEBIT") + + Decision order: + 1. Identify from your knowledge of global businesses + 2. Otherwise, match against the user-provided merchants + 3. Otherwise, return null for both fields + INSTRUCTIONS + end + + def user_message + <<~MESSAGE.strip_heredoc + User's known merchants: + + ```json + #{user_merchants.to_json} + ``` + + Transactions to analyze: + + ```json + #{transactions.to_json} + ``` + MESSAGE + end + + def extract_merchants(response) + tool_use = Array(response.content).find { |block| block_type(block) == :tool_use } + raise Provider::Anthropic::Error, "Model did not invoke #{TOOL_NAME}" unless tool_use + + input = block_input(tool_use) + input = JSON.parse(input) if input.is_a?(String) + merchants = input.is_a?(Hash) ? (input["merchants"] || input[:merchants]) : nil + + raise Provider::Anthropic::Error, "Tool call missing merchants" unless merchants.is_a?(Array) + merchants + end + + def build_response(merchants) + merchants.map do |m| + AutoDetectedMerchant.new( + transaction_id: m["transaction_id"] || m[:transaction_id], + business_name: normalize_merchant_name(m["business_name"] || m[:business_name]), + business_url: normalize_value(m["business_url"] || m[:business_url]) + ) + end + end + + def normalize_value(value) + return nil if value.nil? + str = value.to_s.strip + return nil if str.empty? || str.casecmp("null").zero? + str + end + + def normalize_merchant_name(value) + str = normalize_value(value) + return nil unless str + return str if user_merchants.blank? + + match = user_merchants.find { |m| m[:name].to_s.casecmp(str).zero? } + match ? match[:name] : str + end + + def block_type(block) + raw = block.respond_to?(:type) ? block.type : block[:type] || block["type"] + raw.to_s.to_sym + end + + def block_input(block) + block.respond_to?(:input) ? block.input : (block[:input] || block["input"]) + end + + def usage_hash(raw_usage) + return {} unless raw_usage + { + "input_tokens" => raw_usage.input_tokens.to_i, + "output_tokens" => raw_usage.output_tokens.to_i, + "total_tokens" => raw_usage.input_tokens.to_i + raw_usage.output_tokens.to_i + } + end +end diff --git a/app/models/provider/anthropic/concerns/usage_recorder.rb b/app/models/provider/anthropic/concerns/usage_recorder.rb new file mode 100644 index 000000000..7d4f483bb --- /dev/null +++ b/app/models/provider/anthropic/concerns/usage_recorder.rb @@ -0,0 +1,75 @@ +module Provider::Anthropic::Concerns::UsageRecorder + extend ActiveSupport::Concern + + private + + # Persists an LlmUsage row from an Anthropic Message#usage object. + # Returns nil if no family is attached (e.g., system-initiated calls). + def record_usage(model_name, raw_usage, operation:, metadata: {}) + return unless family && raw_usage + + input_tokens = raw_usage.input_tokens.to_i + output_tokens = raw_usage.output_tokens.to_i + total_tokens = input_tokens + output_tokens + cache_creation = raw_usage.respond_to?(:cache_creation_input_tokens) ? raw_usage.cache_creation_input_tokens : nil + cache_read = raw_usage.respond_to?(:cache_read_input_tokens) ? raw_usage.cache_read_input_tokens : nil + + estimated_cost = LlmUsage.calculate_cost( + model: model_name, + prompt_tokens: input_tokens, + completion_tokens: output_tokens + ) + + family.llm_usages.create!( + provider: "anthropic", + model: model_name, + operation: operation, + prompt_tokens: input_tokens, + completion_tokens: output_tokens, + total_tokens: total_tokens, + cache_creation_tokens: cache_creation, + cache_read_tokens: cache_read, + estimated_cost: estimated_cost, + metadata: metadata + ) + + Rails.logger.info("LLM usage recorded - Provider: anthropic, Operation: #{operation}, Cost: #{estimated_cost.inspect}") + rescue => e + Rails.logger.error("Failed to record LLM usage: #{e.message}") + end + + def record_usage_error(model_name, operation:, error:, metadata: {}) + return unless family && error + + http_status_code = extract_http_status_code(error) + + family.llm_usages.create!( + provider: "anthropic", + model: model_name, + operation: operation, + prompt_tokens: 0, + completion_tokens: 0, + total_tokens: 0, + estimated_cost: nil, + metadata: metadata.merge(error: safe_error_message(error), http_status_code: http_status_code) + ) + rescue => e + Rails.logger.error("Failed to record LLM usage error: #{e.message}") + end + + def extract_http_status_code(error) + if error.respond_to?(:status) + error.status + elsif error.respond_to?(:http_status) + error.http_status + elsif safe_error_message(error) =~ /(\d{3})/ + $1.to_i + end + end + + def safe_error_message(error) + error&.message + rescue => e + "(message unavailable: #{e.class})" + end +end diff --git a/app/models/provider/anthropic/provider_merchant_enhancer.rb b/app/models/provider/anthropic/provider_merchant_enhancer.rb new file mode 100644 index 000000000..1a74604ed --- /dev/null +++ b/app/models/provider/anthropic/provider_merchant_enhancer.rb @@ -0,0 +1,154 @@ +class Provider::Anthropic::ProviderMerchantEnhancer + include Provider::Anthropic::Concerns::UsageRecorder + + TOOL_NAME = "report_enhancements".freeze + + attr_reader :client, :model, :merchants, :langfuse_trace, :family + + def initialize(client, model:, merchants: [], langfuse_trace: nil, family: nil) + @client = client + @model = model + @merchants = merchants + @langfuse_trace = langfuse_trace + @family = family + end + + def enhance_merchants + span = langfuse_trace&.span(name: "enhance_provider_merchants_api_call", input: { + model: model, + merchants: merchants + }) + + response = client.messages.create( + model: model, + max_tokens: max_tokens, + system_: instructions, + messages: [ { role: "user", content: user_message } ], + tools: [ output_tool ], + tool_choice: { type: "tool", name: TOOL_NAME, disable_parallel_tool_use: true } + ) + + enhanced = extract_enhancements(response) + result = build_response(enhanced) + + record_usage(model, response.usage, operation: "enhance_provider_merchants", metadata: { merchant_count: merchants.size }) + + span&.end(output: result.map(&:to_h), usage: usage_hash(response.usage)) + result + rescue => e + span&.end(output: { error: e.message }, level: "ERROR") + record_usage_error(model, operation: "enhance_provider_merchants", error: e, metadata: { merchant_count: merchants.size }) + raise + end + + private + EnhancedMerchant = Provider::LlmConcept::EnhancedMerchant + + def max_tokens + ENV.fetch("ANTHROPIC_MAX_TOKENS", 4096).to_i + end + + def output_tool + { + name: TOOL_NAME, + description: "Return the business website URL for each input merchant.", + input_schema: { + type: "object", + properties: { + merchants: { + type: "array", + description: "One result per input merchant.", + items: { + type: "object", + properties: { + merchant_id: { + type: "string", + description: "The merchant's internal ID", + enum: merchants.map { |m| m[:id].to_s } + }, + business_url: { + type: [ "string", "null" ], + description: "Business website without the www. subdomain, or null if uncertain or local" + } + }, + required: [ "merchant_id", "business_url" ], + additionalProperties: false + } + } + }, + required: [ "merchants" ], + additionalProperties: false + } + } + end + + def instructions + <<~INSTRUCTIONS.strip_heredoc + You are an assistant to a consumer personal finance app. Given a list of merchant names, identify + the business website URL for each and return the result via the report_enhancements tool. + + Follow ALL the rules below: + + - One result per merchant, correlated by merchant_id + - Do NOT include the www. subdomain ("walmart.com", not "www.walmart.com") + - Favor null over false positives; only return a URL when 80%+ confident + - NEVER return a URL for generic or local-only merchants ("Local diner", "Gas station", "ATM withdrawal") + INSTRUCTIONS + end + + def user_message + <<~MESSAGE.strip_heredoc + Enhance the following merchants by identifying each one's website URL: + + ```json + #{merchants.to_json} + ``` + MESSAGE + end + + def extract_enhancements(response) + tool_use = Array(response.content).find { |block| block_type(block) == :tool_use } + raise Provider::Anthropic::Error, "Model did not invoke #{TOOL_NAME}" unless tool_use + + input = block_input(tool_use) + input = JSON.parse(input) if input.is_a?(String) + enhanced = input.is_a?(Hash) ? (input["merchants"] || input[:merchants]) : nil + + raise Provider::Anthropic::Error, "Tool call missing merchants" unless enhanced.is_a?(Array) + enhanced + end + + def build_response(enhanced) + enhanced.map do |m| + EnhancedMerchant.new( + merchant_id: m["merchant_id"] || m[:merchant_id], + business_url: normalize_value(m["business_url"] || m[:business_url]) + ) + end + end + + def normalize_value(value) + return nil if value.nil? + str = value.to_s.strip + return nil if str.empty? || str.casecmp("null").zero? + str + end + + def block_type(block) + raw = block.respond_to?(:type) ? block.type : block[:type] || block["type"] + raw.to_s.to_sym + end + + def block_input(block) + block.respond_to?(:input) ? block.input : (block[:input] || block["input"]) + end + + def usage_hash(raw_usage) + return {} unless raw_usage + { + "input_tokens" => raw_usage.input_tokens.to_i, + "output_tokens" => raw_usage.output_tokens.to_i, + "total_tokens" => raw_usage.input_tokens.to_i + raw_usage.output_tokens.to_i + } + end +end diff --git a/db/migrate/20260525120000_add_anthropic_cache_tokens_to_llm_usages.rb b/db/migrate/20260525120000_add_anthropic_cache_tokens_to_llm_usages.rb new file mode 100644 index 000000000..4857dd710 --- /dev/null +++ b/db/migrate/20260525120000_add_anthropic_cache_tokens_to_llm_usages.rb @@ -0,0 +1,9 @@ +class AddAnthropicCacheTokensToLlmUsages < ActiveRecord::Migration[7.2] + def change + # Anthropic reports cache_creation_input_tokens (charged at ~1.25x input rate + # for 5-min TTL) and cache_read_input_tokens (charged at 0.1x input rate). + # OpenAI usage rows leave these null. + add_column :llm_usages, :cache_creation_tokens, :integer + add_column :llm_usages, :cache_read_tokens, :integer + end +end diff --git a/db/schema.rb b/db/schema.rb index c7fcf54ae..fae7c692d 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2026_05_19_100000) do +ActiveRecord::Schema[7.2].define(version: 2026_05_25_120000) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" enable_extension "plpgsql" @@ -498,7 +498,7 @@ ActiveRecord::Schema[7.2].define(version: 2026_05_19_100000) do t.index ["provider_key"], name: "index_debug_log_entries_on_provider_key" t.index ["source"], name: "index_debug_log_entries_on_source" t.index ["user_id"], name: "index_debug_log_entries_on_user_id" - t.check_constraint "level::text = ANY (ARRAY['debug'::character varying, 'info'::character varying, 'warn'::character varying, 'error'::character varying]::text[])", name: "chk_debug_log_entries_level" + t.check_constraint "level::text = ANY (ARRAY['debug'::character varying::text, 'info'::character varying::text, 'warn'::character varying::text, 'error'::character varying::text])", name: "chk_debug_log_entries_level" end create_table "depositories", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t| @@ -1063,6 +1063,8 @@ ActiveRecord::Schema[7.2].define(version: 2026_05_19_100000) do t.jsonb "metadata", default: {} t.datetime "created_at", null: false t.datetime "updated_at", null: false + t.integer "cache_creation_tokens" + t.integer "cache_read_tokens" t.index ["family_id", "created_at"], name: "index_llm_usages_on_family_id_and_created_at" t.index ["family_id", "operation"], name: "index_llm_usages_on_family_id_and_operation" t.index ["family_id"], name: "index_llm_usages_on_family_id" diff --git a/test/models/llm_usage_test.rb b/test/models/llm_usage_test.rb new file mode 100644 index 000000000..423544ea3 --- /dev/null +++ b/test/models/llm_usage_test.rb @@ -0,0 +1,35 @@ +require "test_helper" + +class LlmUsageTest < ActiveSupport::TestCase + test "infer_provider returns anthropic for claude models" do + assert_equal "anthropic", LlmUsage.infer_provider("claude-sonnet-4-6") + assert_equal "anthropic", LlmUsage.infer_provider("claude-opus-4-7") + assert_equal "anthropic", LlmUsage.infer_provider("claude-haiku-4-5") + end + + test "infer_provider still returns openai for gpt models" do + assert_equal "openai", LlmUsage.infer_provider("gpt-4.1") + assert_equal "openai", LlmUsage.infer_provider("gpt-5") + end + + test "calculate_cost returns Anthropic pricing for Claude models" do + cost = LlmUsage.calculate_cost(model: "claude-sonnet-4-6", prompt_tokens: 1_000_000, completion_tokens: 100_000) + + # 1M input * $3/MTok + 100K output * $15/MTok = $3.00 + $1.50 = $4.50 + assert_in_delta 4.5, cost, 0.0001 + end + + test "calculate_cost uses higher pricing for Opus" do + cost = LlmUsage.calculate_cost(model: "claude-opus-4-7", prompt_tokens: 1_000_000, completion_tokens: 0) + + # 1M input * $15/MTok = $15.00 + assert_in_delta 15.0, cost, 0.0001 + end + + test "calculate_cost uses lower pricing for Haiku" do + cost = LlmUsage.calculate_cost(model: "claude-haiku-4-5", prompt_tokens: 1_000_000, completion_tokens: 1_000_000) + + # $1 in + $5 out = $6.00 + assert_in_delta 6.0, cost, 0.0001 + end +end diff --git a/test/models/provider/anthropic/auto_categorizer_test.rb b/test/models/provider/anthropic/auto_categorizer_test.rb new file mode 100644 index 000000000..dcb9c7ea9 --- /dev/null +++ b/test/models/provider/anthropic/auto_categorizer_test.rb @@ -0,0 +1,124 @@ +require "test_helper" + +class Provider::Anthropic::AutoCategorizerTest < ActiveSupport::TestCase + setup do + @transactions = [ + { id: "txn_1", name: "McDonalds", amount: 20, classification: "expense" }, + { id: "txn_2", name: "Netflix", amount: 15, classification: "expense" } + ] + @user_categories = [ + { id: "cat_food", name: "Fast Food", classification: "expense" }, + { id: "cat_subs", name: "Subscriptions", classification: "expense" } + ] + end + + test "issues a forced tool call and maps the response into AutoCategorization records" do + fake_response = build_response(content: [ + tool_use_block( + id: "toolu_1", + name: "report_categorizations", + input: { + "categorizations" => [ + { "transaction_id" => "txn_1", "category_name" => "Fast Food" }, + { "transaction_id" => "txn_2", "category_name" => "Subscriptions" } + ] + } + ) + ]) + client = stub_client(fake_response, expect_request: ->(params) { + assert_equal "claude-haiku-4-5", params[:model] + assert_equal [ { type: "tool", name: "report_categorizations", disable_parallel_tool_use: true } ].first, params[:tool_choice] + assert_equal 1, params[:tools].size + assert_equal "report_categorizations", params[:tools].first[:name] + }) + + result = Provider::Anthropic::AutoCategorizer.new( + client, + model: "claude-haiku-4-5", + transactions: @transactions, + user_categories: @user_categories + ).auto_categorize + + assert_equal 2, result.size + assert_equal "Fast Food", result.find { |r| r.transaction_id == "txn_1" }.category_name + assert_equal "Subscriptions", result.find { |r| r.transaction_id == "txn_2" }.category_name + end + + test "normalizes null category names to nil" do + fake_response = build_response(content: [ + tool_use_block( + id: "toolu_2", + name: "report_categorizations", + input: { + "categorizations" => [ + { "transaction_id" => "txn_1", "category_name" => nil }, + { "transaction_id" => "txn_2", "category_name" => "null" } + ] + } + ) + ]) + client = stub_client(fake_response) + + result = Provider::Anthropic::AutoCategorizer.new( + client, + model: "claude-haiku-4-5", + transactions: @transactions, + user_categories: @user_categories + ).auto_categorize + + assert_nil result.find { |r| r.transaction_id == "txn_1" }.category_name + assert_nil result.find { |r| r.transaction_id == "txn_2" }.category_name + end + + test "raises when no tool_use block is present in the response" do + fake_response = build_response(content: [ text_block("No tool use") ]) + client = stub_client(fake_response) + + err = assert_raises(Provider::Anthropic::Error) do + Provider::Anthropic::AutoCategorizer.new( + client, + model: "claude-haiku-4-5", + transactions: @transactions, + user_categories: @user_categories + ).auto_categorize + end + + assert_match(/did not invoke report_categorizations/i, err.message) + end + + private + def stub_client(response, expect_request: nil) + messages = mock + if expect_request + messages.expects(:create).with do |params| + expect_request.call(params) + true + end.returns(response) + else + messages.stubs(:create).returns(response) + end + client = mock + client.stubs(:messages).returns(messages) + client + end + + def build_response(content:, usage: { input_tokens: 50, output_tokens: 25 }) + OpenStruct.new( + id: "msg_test", + model: "claude-haiku-4-5", + content: content, + usage: OpenStruct.new( + input_tokens: usage[:input_tokens], + output_tokens: usage[:output_tokens] + ) + ) + end + + def text_block(text) + OpenStruct.new(type: :text, text: text) + end + + def tool_use_block(id:, name:, input:) + OpenStruct.new(type: :tool_use, id: id, name: name, input: input) + end +end diff --git a/test/models/provider/anthropic/auto_merchant_detector_test.rb b/test/models/provider/anthropic/auto_merchant_detector_test.rb new file mode 100644 index 000000000..682a21325 --- /dev/null +++ b/test/models/provider/anthropic/auto_merchant_detector_test.rb @@ -0,0 +1,115 @@ +require "test_helper" + +class Provider::Anthropic::AutoMerchantDetectorTest < ActiveSupport::TestCase + setup do + @transactions = [ + { id: "txn_1", name: "AMZN purchases", classification: "expense" }, + { id: "txn_2", name: "Local diner", classification: "expense" } + ] + @user_merchants = [ { id: "m1", name: "Shooters" } ] + end + + test "issues a forced tool call and maps merchants" do + fake_response = build_response(content: [ + tool_use_block( + id: "toolu_1", + name: "report_merchants", + input: { + "merchants" => [ + { "transaction_id" => "txn_1", "business_name" => "Amazon", "business_url" => "amazon.com" }, + { "transaction_id" => "txn_2", "business_name" => nil, "business_url" => nil } + ] + } + ) + ]) + client = stub_client(fake_response, expect_request: ->(params) { + assert_equal "claude-haiku-4-5", params[:model] + assert_equal "report_merchants", params[:tool_choice][:name] + assert params[:tool_choice][:disable_parallel_tool_use] + }) + + result = Provider::Anthropic::AutoMerchantDetector.new( + client, + model: "claude-haiku-4-5", + transactions: @transactions, + user_merchants: @user_merchants + ).auto_detect_merchants + + txn1 = result.find { |r| r.transaction_id == "txn_1" } + txn2 = result.find { |r| r.transaction_id == "txn_2" } + + assert_equal "Amazon", txn1.business_name + assert_equal "amazon.com", txn1.business_url + assert_nil txn2.business_name + assert_nil txn2.business_url + end + + test "normalizes case-insensitive matches against user_merchants" do + fake_response = build_response(content: [ + tool_use_block( + id: "toolu_1", + name: "report_merchants", + input: { + "merchants" => [ + { "transaction_id" => "txn_1", "business_name" => "shooters", "business_url" => nil } + ] + } + ) + ]) + client = stub_client(fake_response) + + result = Provider::Anthropic::AutoMerchantDetector.new( + client, + model: "claude-haiku-4-5", + transactions: [ @transactions.first ], + user_merchants: @user_merchants + ).auto_detect_merchants + + assert_equal "Shooters", result.first.business_name + end + + test "raises when model returns no tool_use" do + fake_response = build_response(content: [ OpenStruct.new(type: :text, text: "I cannot help") ]) + client = stub_client(fake_response) + + err = assert_raises(Provider::Anthropic::Error) do + Provider::Anthropic::AutoMerchantDetector.new( + client, + model: "claude-haiku-4-5", + transactions: @transactions, + user_merchants: @user_merchants + ).auto_detect_merchants + end + + assert_match(/did not invoke report_merchants/i, err.message) + end + + private + def stub_client(response, expect_request: nil) + messages = mock + if expect_request + messages.expects(:create).with do |params| + expect_request.call(params) + true + end.returns(response) + else + messages.stubs(:create).returns(response) + end + client = mock + client.stubs(:messages).returns(messages) + client + end + + def build_response(content:, usage: { input_tokens: 100, output_tokens: 40 }) + OpenStruct.new( + id: "msg_test", + model: "claude-haiku-4-5", + content: content, + usage: OpenStruct.new(input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens]) + ) + end + + def tool_use_block(id:, name:, input:) + OpenStruct.new(type: :tool_use, id: id, name: name, input: input) + end +end diff --git a/test/models/provider/anthropic/provider_merchant_enhancer_test.rb b/test/models/provider/anthropic/provider_merchant_enhancer_test.rb new file mode 100644 index 000000000..d45bef0f9 --- /dev/null +++ b/test/models/provider/anthropic/provider_merchant_enhancer_test.rb @@ -0,0 +1,81 @@ +require "test_helper" + +class Provider::Anthropic::ProviderMerchantEnhancerTest < ActiveSupport::TestCase + setup do + @merchants = [ + { id: "m1", name: "Walmart" }, + { id: "m2", name: "Local Diner" } + ] + end + + test "issues a forced tool call and maps enhancements" do + fake_response = build_response(content: [ + tool_use_block( + id: "toolu_1", + name: "report_enhancements", + input: { + "merchants" => [ + { "merchant_id" => "m1", "business_url" => "walmart.com" }, + { "merchant_id" => "m2", "business_url" => nil } + ] + } + ) + ]) + client = stub_client(fake_response, expect_request: ->(params) { + assert_equal "report_enhancements", params[:tool_choice][:name] + }) + + result = Provider::Anthropic::ProviderMerchantEnhancer.new( + client, + model: "claude-haiku-4-5", + merchants: @merchants + ).enhance_merchants + + assert_equal "walmart.com", result.find { |r| r.merchant_id == "m1" }.business_url + assert_nil result.find { |r| r.merchant_id == "m2" }.business_url + end + + test "raises when model returns no tool_use" do + fake_response = build_response(content: [ OpenStruct.new(type: :text, text: "Nope") ]) + client = stub_client(fake_response) + + err = assert_raises(Provider::Anthropic::Error) do + Provider::Anthropic::ProviderMerchantEnhancer.new( + client, + model: "claude-haiku-4-5", + merchants: @merchants + ).enhance_merchants + end + + assert_match(/did not invoke report_enhancements/i, err.message) + end + + private + def stub_client(response, expect_request: nil) + messages = mock + if expect_request + messages.expects(:create).with do |params| + expect_request.call(params) + true + end.returns(response) + else + messages.stubs(:create).returns(response) + end + client = mock + client.stubs(:messages).returns(messages) + client + end + + def build_response(content:, usage: { input_tokens: 60, output_tokens: 20 }) + OpenStruct.new( + id: "msg_test", + model: "claude-haiku-4-5", + content: content, + usage: OpenStruct.new(input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens]) + ) + end + + def tool_use_block(id:, name:, input:) + OpenStruct.new(type: :tool_use, id: id, name: name, input: input) + end +end