feat(ai): add Anthropic batch ops + LLM cost ledger (2/5)

Implements auto_categorize, auto_detect_merchants, and enhance_provider_merchants on Provider::Anthropic via forced tool calls, plus the cost-ledger plumbing they need. - Provider::Anthropic::AutoCategorizer, AutoMerchantDetector, ProviderMerchantEnhancer each define a single output tool whose input_schema mirrors the desired output, then force the model to call it via tool_choice: { type: "tool", name: ..., disable_parallel_tool_use: true }. Anthropic guarantees the tool_use.input matches the schema, so there is no JSON parsing fragility, no <think> tag stripping, and no json_object/json_schema fallback ladders. - Concerns::UsageRecorder mirrors the OpenAI sibling but persists cache_creation_input_tokens / cache_read_input_tokens to dedicated columns instead of metadata. - Migration adds cache_creation_tokens, cache_read_tokens (nullable integers) to llm_usages. OpenAI rows leave them null. - LlmUsage::PRICING gains Claude 4.x rows (opus-4-7 $15/$75, sonnet-4-6 $3/$15, haiku-4-5 $1/$5 per MTok). infer_provider returns "anthropic" for claude-* via the existing exact/prefix lookup. - Provider::Anthropic#chat_response now persists cache columns directly rather than stashing them in metadata. - 25-transaction batch cap mirrors the OpenAI provider so the cost ledger sees the same shape regardless of which provider ran a batch. Tests cover the forced-tool-call path, null/None normalization, case-insensitive merchant matching, the missing-tool_use error path, and Anthropic-specific pricing + provider inference on LlmUsage. Stacked on #1983 (PR 1/5). 3/5 PDF + vision next.
2026-05-29 23:39:03 +00:00 · 2026-05-25 16:38:18 +02:00
parent 4b511c4dad
commit 45c61bcbc1
12 changed files with 1046 additions and 8 deletions
--- a/app/models/llm_usage.rb
+++ b/app/models/llm_usage.rb
@@ -37,6 +37,15 @@ class LlmUsage < ApplicationRecord
    "google" => {
      "gemini-2.5-pro" => { prompt: 1.25, completion: 10.00 },
      "gemini-2.5-flash" => { prompt: 0.3, completion: 2.50 }
+    },
+    # Anthropic pricing per 1M tokens (Claude 4.x family, as of May 2026)
+    # Source: https://www.anthropic.com/pricing
+    "anthropic" => {
+      "claude-opus-4-7" => { prompt: 15.00, completion: 75.00 },
+      "claude-opus-4-6" => { prompt: 15.00, completion: 75.00 },
+      "claude-sonnet-4-6" => { prompt: 3.00, completion: 15.00 },
+      "claude-sonnet-4-5" => { prompt: 3.00, completion: 15.00 },
+      "claude-haiku-4-5" => { prompt: 1.00, completion: 5.00 }
    }
  }.freeze

--- a/app/models/provider/anthropic.rb
+++ b/app/models/provider/anthropic.rb
@@ -66,18 +66,86 @@ class Provider::Anthropic < Provider
    @base_url.present?
  end

-  # Batch operations land in PR2 — keep the LlmConcept contract honest by
-  # surfacing a clear error if a caller routes here too early.
  def auto_categorize(transactions: [], user_categories: [], model: "", family: nil, json_mode: nil)
-    raise Error, "auto_categorize not yet implemented for Provider::Anthropic"
+    with_provider_response do
+      raise Error, "Too many transactions to auto-categorize. Max is 25 per request." if transactions.size > 25
+      if user_categories.blank?
+        family_id = family&.id || "unknown"
+        Rails.logger.error("Cannot auto-categorize transactions for family #{family_id}: no categories available")
+        raise Error, "No categories available for auto-categorization"
+      end
+
+      effective_model = model.presence || @default_model
+
+      trace = create_langfuse_trace(
+        name: "anthropic.auto_categorize",
+        input: { transactions: transactions, user_categories: user_categories }
+      )
+
+      result = AutoCategorizer.new(
+        client,
+        model: effective_model,
+        transactions: transactions,
+        user_categories: user_categories,
+        langfuse_trace: trace,
+        family: family
+      ).auto_categorize
+
+      upsert_langfuse_trace(trace: trace, output: result.map(&:to_h))
+
+      result
+    end
  end

  def auto_detect_merchants(transactions: [], user_merchants: [], model: "", family: nil, json_mode: nil)
-    raise Error, "auto_detect_merchants not yet implemented for Provider::Anthropic"
+    with_provider_response do
+      raise Error, "Too many transactions to auto-detect merchants. Max is 25 per request." if transactions.size > 25
+
+      effective_model = model.presence || @default_model
+
+      trace = create_langfuse_trace(
+        name: "anthropic.auto_detect_merchants",
+        input: { transactions: transactions, user_merchants: user_merchants }
+      )
+
+      result = AutoMerchantDetector.new(
+        client,
+        model: effective_model,
+        transactions: transactions,
+        user_merchants: user_merchants,
+        langfuse_trace: trace,
+        family: family
+      ).auto_detect_merchants
+
+      upsert_langfuse_trace(trace: trace, output: result.map(&:to_h))
+
+      result
+    end
  end

  def enhance_provider_merchants(merchants: [], model: "", family: nil, json_mode: nil)
-    raise Error, "enhance_provider_merchants not yet implemented for Provider::Anthropic"
+    with_provider_response do
+      raise Error, "Too many merchants to enhance. Max is 25 per request." if merchants.size > 25
+
+      effective_model = model.presence || @default_model
+
+      trace = create_langfuse_trace(
+        name: "anthropic.enhance_provider_merchants",
+        input: { merchants: merchants }
+      )
+
+      result = ProviderMerchantEnhancer.new(
+        client,
+        model: effective_model,
+        merchants: merchants,
+        langfuse_trace: trace,
+        family: family
+      ).enhance_merchants
+
+      upsert_langfuse_trace(trace: trace, output: result.map(&:to_h))
+
+      result
+    end
  end

  def supports_pdf_processing?(model: @default_model)
@@ -345,8 +413,10 @@ class Provider::Anthropic < Provider
        prompt_tokens: prompt_tokens,
        completion_tokens: completion_tokens,
        total_tokens: total_tokens,
+        cache_creation_tokens: usage["cache_creation_input_tokens"],
+        cache_read_tokens: usage["cache_read_input_tokens"],
        estimated_cost: estimated_cost,
-        metadata: usage.slice("cache_creation_input_tokens", "cache_read_input_tokens").compact
+        metadata: {}
      )
    rescue => e
      Rails.logger.error("Failed to record LLM usage: #{e.message}")
--- a/app/models/provider/anthropic/auto_categorizer.rb
+++ b/app/models/provider/anthropic/auto_categorizer.rb
@@ -0,0 +1,176 @@
+class Provider::Anthropic::AutoCategorizer
+  include Provider::Anthropic::Concerns::UsageRecorder
+
+  TOOL_NAME = "report_categorizations".freeze
+
+  attr_reader :client, :model, :transactions, :user_categories, :langfuse_trace, :family
+
+  def initialize(client, model:, transactions: [], user_categories: [], langfuse_trace: nil, family: nil)
+    @client = client
+    @model = model
+    @transactions = transactions
+    @user_categories = user_categories
+    @langfuse_trace = langfuse_trace
+    @family = family
+  end
+
+  def auto_categorize
+    span = langfuse_trace&.span(name: "auto_categorize_api_call", input: {
+      model: model,
+      transactions: transactions,
+      user_categories: user_categories
+    })
+
+    response = client.messages.create(
+      model: model,
+      max_tokens: max_tokens,
+      system_: instructions,
+      messages: [ { role: "user", content: user_message } ],
+      tools: [ output_tool ],
+      tool_choice: { type: "tool", name: TOOL_NAME, disable_parallel_tool_use: true }
+    )
+
+    categorizations = extract_categorizations(response)
+    result = build_response(categorizations)
+
+    record_usage(model, response.usage, operation: "auto_categorize", metadata: {
+      transaction_count: transactions.size,
+      category_count: user_categories.size
+    })
+
+    span&.end(output: result.map(&:to_h), usage: usage_hash(response.usage))
+    result
+  rescue => e
+    span&.end(output: { error: e.message }, level: "ERROR")
+    record_usage_error(model, operation: "auto_categorize", error: e, metadata: {
+      transaction_count: transactions.size,
+      category_count: user_categories.size
+    })
+    raise
+  end
+
+  private
+    AutoCategorization = Provider::LlmConcept::AutoCategorization
+
+    def max_tokens
+      ENV.fetch("ANTHROPIC_MAX_TOKENS", 4096).to_i
+    end
+
+    def output_tool
+      {
+        name: TOOL_NAME,
+        description: "Return the categorization decision for each input transaction.",
+        input_schema: {
+          type: "object",
+          properties: {
+            categorizations: {
+              type: "array",
+              description: "One categorization per input transaction.",
+              items: {
+                type: "object",
+                properties: {
+                  transaction_id: {
+                    type: "string",
+                    description: "The internal ID of the original transaction",
+                    enum: transactions.map { |t| t[:id] }
+                  },
+                  category_name: {
+                    type: [ "string", "null" ],
+                    description: "Matched category name from the user's categories, or null when uncertain.",
+                    enum: [ *user_categories.map { |c| c[:name] }, nil ]
+                  }
+                },
+                required: [ "transaction_id", "category_name" ],
+                additionalProperties: false
+              }
+            }
+          },
+          required: [ "categorizations" ],
+          additionalProperties: false
+        }
+      }
+    end
+
+    def instructions
+      <<~INSTRUCTIONS.strip_heredoc
+        You are an assistant to a consumer personal finance app. You will be provided a list of the user's
+        transactions and a list of the user's categories. Your job is to auto-categorize each transaction
+        and return the result via the report_categorizations tool.
+
+        Follow ALL the rules below:
+
+        - Return one result per transaction, correlated by transaction_id
+        - Use the most specific category possible (subcategory over parent category)
+        - Any category may be used regardless of whether the transaction is income or expense
+        - Return null for category_name when you are not 60%+ confident, or when the description is
+          generic/ambiguous (e.g., "POS DEBIT", "ACH WITHDRAWAL", "CHECK #1234")
+        - The `hint` field on a transaction (when present) comes from third-party aggregators and may
+          or may not match the user's categories — treat it as a weak signal
+      INSTRUCTIONS
+    end
+
+    def user_message
+      <<~MESSAGE.strip_heredoc
+        Here are the user's available categories in JSON:
+
+        ```json
+        #{user_categories.to_json}
+        ```
+
+        Auto-categorize the following transactions:
+
+        ```json
+        #{transactions.to_json}
+        ```
+      MESSAGE
+    end
+
+    def extract_categorizations(response)
+      tool_use = Array(response.content).find { |block| block_type(block) == :tool_use }
+      raise Provider::Anthropic::Error, "Model did not invoke #{TOOL_NAME}" unless tool_use
+
+      input = block_input(tool_use)
+      input = JSON.parse(input) if input.is_a?(String)
+      categorizations = input.is_a?(Hash) ? (input["categorizations"] || input[:categorizations]) : nil
+
+      raise Provider::Anthropic::Error, "Tool call missing categorizations" unless categorizations.is_a?(Array)
+      categorizations
+    end
+
+    def build_response(categorizations)
+      categorizations.map do |c|
+        category_name = c["category_name"] || c[:category_name]
+        AutoCategorization.new(
+          transaction_id: c["transaction_id"] || c[:transaction_id],
+          category_name: normalize_category(category_name)
+        )
+      end
+    end
+
+    def normalize_category(value)
+      return nil if value.nil?
+      str = value.to_s.strip
+      return nil if str.empty? || str.casecmp("null").zero?
+
+      match = user_categories.find { |c| c[:name].to_s.casecmp(str).zero? }
+      match ? match[:name] : str
+    end
+
+    def block_type(block)
+      raw = block.respond_to?(:type) ? block.type : block[:type] || block["type"]
+      raw.to_s.to_sym
+    end
+
+    def block_input(block)
+      block.respond_to?(:input) ? block.input : (block[:input] || block["input"])
+    end
+
+    def usage_hash(raw_usage)
+      return {} unless raw_usage
+      {
+        "input_tokens" => raw_usage.input_tokens.to_i,
+        "output_tokens" => raw_usage.output_tokens.to_i,
+        "total_tokens" => raw_usage.input_tokens.to_i + raw_usage.output_tokens.to_i
+      }
+    end
+end
--- a/app/models/provider/anthropic/auto_merchant_detector.rb
+++ b/app/models/provider/anthropic/auto_merchant_detector.rb
@@ -0,0 +1,188 @@
+class Provider::Anthropic::AutoMerchantDetector
+  include Provider::Anthropic::Concerns::UsageRecorder
+
+  TOOL_NAME = "report_merchants".freeze
+
+  attr_reader :client, :model, :transactions, :user_merchants, :langfuse_trace, :family
+
+  def initialize(client, model:, transactions: [], user_merchants: [], langfuse_trace: nil, family: nil)
+    @client = client
+    @model = model
+    @transactions = transactions
+    @user_merchants = user_merchants
+    @langfuse_trace = langfuse_trace
+    @family = family
+  end
+
+  def auto_detect_merchants
+    span = langfuse_trace&.span(name: "auto_detect_merchants_api_call", input: {
+      model: model,
+      transactions: transactions,
+      user_merchants: user_merchants
+    })
+
+    response = client.messages.create(
+      model: model,
+      max_tokens: max_tokens,
+      system_: instructions,
+      messages: [ { role: "user", content: user_message } ],
+      tools: [ output_tool ],
+      tool_choice: { type: "tool", name: TOOL_NAME, disable_parallel_tool_use: true }
+    )
+
+    merchants_data = extract_merchants(response)
+    result = build_response(merchants_data)
+
+    record_usage(model, response.usage, operation: "auto_detect_merchants", metadata: {
+      transaction_count: transactions.size,
+      merchant_count: user_merchants.size
+    })
+
+    span&.end(output: result.map(&:to_h), usage: usage_hash(response.usage))
+    result
+  rescue => e
+    span&.end(output: { error: e.message }, level: "ERROR")
+    record_usage_error(model, operation: "auto_detect_merchants", error: e, metadata: {
+      transaction_count: transactions.size,
+      merchant_count: user_merchants.size
+    })
+    raise
+  end
+
+  private
+    AutoDetectedMerchant = Provider::LlmConcept::AutoDetectedMerchant
+
+    def max_tokens
+      ENV.fetch("ANTHROPIC_MAX_TOKENS", 4096).to_i
+    end
+
+    def output_tool
+      {
+        name: TOOL_NAME,
+        description: "Return the detected business name and website URL for each input transaction.",
+        input_schema: {
+          type: "object",
+          properties: {
+            merchants: {
+              type: "array",
+              description: "One detection result per input transaction.",
+              items: {
+                type: "object",
+                properties: {
+                  transaction_id: {
+                    type: "string",
+                    description: "The internal ID of the original transaction",
+                    enum: transactions.map { |t| t[:id] }
+                  },
+                  business_name: {
+                    type: [ "string", "null" ],
+                    description: "Detected business name, or null if uncertain or generic"
+                  },
+                  business_url: {
+                    type: [ "string", "null" ],
+                    description: "Business website without the www. subdomain (e.g., \"amazon.com\"), or null if uncertain"
+                  }
+                },
+                required: [ "transaction_id", "business_name", "business_url" ],
+                additionalProperties: false
+              }
+            }
+          },
+          required: [ "merchants" ],
+          additionalProperties: false
+        }
+      }
+    end
+
+    def instructions
+      <<~INSTRUCTIONS.strip_heredoc
+        You are an assistant to a consumer personal finance app. Detect the business name and website URL
+        for each transaction and return the result via the report_merchants tool.
+
+        Follow ALL the rules below:
+
+        - One result per transaction, correlated by transaction_id
+        - Do NOT include the www. subdomain in business_url ("amazon.com", not "www.amazon.com")
+        - User-provided merchants should only be used when the match is unambiguous
+        - Favor null over false positives; only return values when 80%+ confident
+        - NEVER return a name/URL for generic descriptions ("Paycheck", "Local diner", "ATM", "POS DEBIT")
+
+        Decision order:
+          1. Identify from your knowledge of global businesses
+          2. Otherwise, match against the user-provided merchants
+          3. Otherwise, return null for both fields
+      INSTRUCTIONS
+    end
+
+    def user_message
+      <<~MESSAGE.strip_heredoc
+        User's known merchants:
+
+        ```json
+        #{user_merchants.to_json}
+        ```
+
+        Transactions to analyze:
+
+        ```json
+        #{transactions.to_json}
+        ```
+      MESSAGE
+    end
+
+    def extract_merchants(response)
+      tool_use = Array(response.content).find { |block| block_type(block) == :tool_use }
+      raise Provider::Anthropic::Error, "Model did not invoke #{TOOL_NAME}" unless tool_use
+
+      input = block_input(tool_use)
+      input = JSON.parse(input) if input.is_a?(String)
+      merchants = input.is_a?(Hash) ? (input["merchants"] || input[:merchants]) : nil
+
+      raise Provider::Anthropic::Error, "Tool call missing merchants" unless merchants.is_a?(Array)
+      merchants
+    end
+
+    def build_response(merchants)
+      merchants.map do |m|
+        AutoDetectedMerchant.new(
+          transaction_id: m["transaction_id"] || m[:transaction_id],
+          business_name: normalize_merchant_name(m["business_name"] || m[:business_name]),
+          business_url: normalize_value(m["business_url"] || m[:business_url])
+        )
+      end
+    end
+
+    def normalize_value(value)
+      return nil if value.nil?
+      str = value.to_s.strip
+      return nil if str.empty? || str.casecmp("null").zero?
+      str
+    end
+
+    def normalize_merchant_name(value)
+      str = normalize_value(value)
+      return nil unless str
+      return str if user_merchants.blank?
+
+      match = user_merchants.find { |m| m[:name].to_s.casecmp(str).zero? }
+      match ? match[:name] : str
+    end
+
+    def block_type(block)
+      raw = block.respond_to?(:type) ? block.type : block[:type] || block["type"]
+      raw.to_s.to_sym
+    end
+
+    def block_input(block)
+      block.respond_to?(:input) ? block.input : (block[:input] || block["input"])
+    end
+
+    def usage_hash(raw_usage)
+      return {} unless raw_usage
+      {
+        "input_tokens" => raw_usage.input_tokens.to_i,
+        "output_tokens" => raw_usage.output_tokens.to_i,
+        "total_tokens" => raw_usage.input_tokens.to_i + raw_usage.output_tokens.to_i
+      }
+    end
+end
--- a/app/models/provider/anthropic/concerns/usage_recorder.rb
+++ b/app/models/provider/anthropic/concerns/usage_recorder.rb
@@ -0,0 +1,75 @@
+module Provider::Anthropic::Concerns::UsageRecorder
+  extend ActiveSupport::Concern
+
+  private
+
+    # Persists an LlmUsage row from an Anthropic Message#usage object.
+    # Returns nil if no family is attached (e.g., system-initiated calls).
+    def record_usage(model_name, raw_usage, operation:, metadata: {})
+      return unless family && raw_usage
+
+      input_tokens = raw_usage.input_tokens.to_i
+      output_tokens = raw_usage.output_tokens.to_i
+      total_tokens = input_tokens + output_tokens
+      cache_creation = raw_usage.respond_to?(:cache_creation_input_tokens) ? raw_usage.cache_creation_input_tokens : nil
+      cache_read = raw_usage.respond_to?(:cache_read_input_tokens) ? raw_usage.cache_read_input_tokens : nil
+
+      estimated_cost = LlmUsage.calculate_cost(
+        model: model_name,
+        prompt_tokens: input_tokens,
+        completion_tokens: output_tokens
+      )
+
+      family.llm_usages.create!(
+        provider: "anthropic",
+        model: model_name,
+        operation: operation,
+        prompt_tokens: input_tokens,
+        completion_tokens: output_tokens,
+        total_tokens: total_tokens,
+        cache_creation_tokens: cache_creation,
+        cache_read_tokens: cache_read,
+        estimated_cost: estimated_cost,
+        metadata: metadata
+      )
+
+      Rails.logger.info("LLM usage recorded - Provider: anthropic, Operation: #{operation}, Cost: #{estimated_cost.inspect}")
+    rescue => e
+      Rails.logger.error("Failed to record LLM usage: #{e.message}")
+    end
+
+    def record_usage_error(model_name, operation:, error:, metadata: {})
+      return unless family && error
+
+      http_status_code = extract_http_status_code(error)
+
+      family.llm_usages.create!(
+        provider: "anthropic",
+        model: model_name,
+        operation: operation,
+        prompt_tokens: 0,
+        completion_tokens: 0,
+        total_tokens: 0,
+        estimated_cost: nil,
+        metadata: metadata.merge(error: safe_error_message(error), http_status_code: http_status_code)
+      )
+    rescue => e
+      Rails.logger.error("Failed to record LLM usage error: #{e.message}")
+    end
+
+    def extract_http_status_code(error)
+      if error.respond_to?(:status)
+        error.status
+      elsif error.respond_to?(:http_status)
+        error.http_status
+      elsif safe_error_message(error) =~ /(\d{3})/
+        $1.to_i
+      end
+    end
+
+    def safe_error_message(error)
+      error&.message
+    rescue => e
+      "(message unavailable: #{e.class})"
+    end
+end
--- a/app/models/provider/anthropic/provider_merchant_enhancer.rb
+++ b/app/models/provider/anthropic/provider_merchant_enhancer.rb
@@ -0,0 +1,154 @@
+class Provider::Anthropic::ProviderMerchantEnhancer
+  include Provider::Anthropic::Concerns::UsageRecorder
+
+  TOOL_NAME = "report_enhancements".freeze
+
+  attr_reader :client, :model, :merchants, :langfuse_trace, :family
+
+  def initialize(client, model:, merchants: [], langfuse_trace: nil, family: nil)
+    @client = client
+    @model = model
+    @merchants = merchants
+    @langfuse_trace = langfuse_trace
+    @family = family
+  end
+
+  def enhance_merchants
+    span = langfuse_trace&.span(name: "enhance_provider_merchants_api_call", input: {
+      model: model,
+      merchants: merchants
+    })
+
+    response = client.messages.create(
+      model: model,
+      max_tokens: max_tokens,
+      system_: instructions,
+      messages: [ { role: "user", content: user_message } ],
+      tools: [ output_tool ],
+      tool_choice: { type: "tool", name: TOOL_NAME, disable_parallel_tool_use: true }
+    )
+
+    enhanced = extract_enhancements(response)
+    result = build_response(enhanced)
+
+    record_usage(model, response.usage, operation: "enhance_provider_merchants", metadata: { merchant_count: merchants.size })
+
+    span&.end(output: result.map(&:to_h), usage: usage_hash(response.usage))
+    result
+  rescue => e
+    span&.end(output: { error: e.message }, level: "ERROR")
+    record_usage_error(model, operation: "enhance_provider_merchants", error: e, metadata: { merchant_count: merchants.size })
+    raise
+  end
+
+  private
+    EnhancedMerchant = Provider::LlmConcept::EnhancedMerchant
+
+    def max_tokens
+      ENV.fetch("ANTHROPIC_MAX_TOKENS", 4096).to_i
+    end
+
+    def output_tool
+      {
+        name: TOOL_NAME,
+        description: "Return the business website URL for each input merchant.",
+        input_schema: {
+          type: "object",
+          properties: {
+            merchants: {
+              type: "array",
+              description: "One result per input merchant.",
+              items: {
+                type: "object",
+                properties: {
+                  merchant_id: {
+                    type: "string",
+                    description: "The merchant's internal ID",
+                    enum: merchants.map { |m| m[:id].to_s }
+                  },
+                  business_url: {
+                    type: [ "string", "null" ],
+                    description: "Business website without the www. subdomain, or null if uncertain or local"
+                  }
+                },
+                required: [ "merchant_id", "business_url" ],
+                additionalProperties: false
+              }
+            }
+          },
+          required: [ "merchants" ],
+          additionalProperties: false
+        }
+      }
+    end
+
+    def instructions
+      <<~INSTRUCTIONS.strip_heredoc
+        You are an assistant to a consumer personal finance app. Given a list of merchant names, identify
+        the business website URL for each and return the result via the report_enhancements tool.
+
+        Follow ALL the rules below:
+
+        - One result per merchant, correlated by merchant_id
+        - Do NOT include the www. subdomain ("walmart.com", not "www.walmart.com")
+        - Favor null over false positives; only return a URL when 80%+ confident
+        - NEVER return a URL for generic or local-only merchants ("Local diner", "Gas station", "ATM withdrawal")
+      INSTRUCTIONS
+    end
+
+    def user_message
+      <<~MESSAGE.strip_heredoc
+        Enhance the following merchants by identifying each one's website URL:
+
+        ```json
+        #{merchants.to_json}
+        ```
+      MESSAGE
+    end
+
+    def extract_enhancements(response)
+      tool_use = Array(response.content).find { |block| block_type(block) == :tool_use }
+      raise Provider::Anthropic::Error, "Model did not invoke #{TOOL_NAME}" unless tool_use
+
+      input = block_input(tool_use)
+      input = JSON.parse(input) if input.is_a?(String)
+      enhanced = input.is_a?(Hash) ? (input["merchants"] || input[:merchants]) : nil
+
+      raise Provider::Anthropic::Error, "Tool call missing merchants" unless enhanced.is_a?(Array)
+      enhanced
+    end
+
+    def build_response(enhanced)
+      enhanced.map do |m|
+        EnhancedMerchant.new(
+          merchant_id: m["merchant_id"] || m[:merchant_id],
+          business_url: normalize_value(m["business_url"] || m[:business_url])
+        )
+      end
+    end
+
+    def normalize_value(value)
+      return nil if value.nil?
+      str = value.to_s.strip
+      return nil if str.empty? || str.casecmp("null").zero?
+      str
+    end
+
+    def block_type(block)
+      raw = block.respond_to?(:type) ? block.type : block[:type] || block["type"]
+      raw.to_s.to_sym
+    end
+
+    def block_input(block)
+      block.respond_to?(:input) ? block.input : (block[:input] || block["input"])
+    end
+
+    def usage_hash(raw_usage)
+      return {} unless raw_usage
+      {
+        "input_tokens" => raw_usage.input_tokens.to_i,
+        "output_tokens" => raw_usage.output_tokens.to_i,
+        "total_tokens" => raw_usage.input_tokens.to_i + raw_usage.output_tokens.to_i
+      }
+    end
+end
--- a/db/migrate/20260525120000_add_anthropic_cache_tokens_to_llm_usages.rb
+++ b/db/migrate/20260525120000_add_anthropic_cache_tokens_to_llm_usages.rb
@@ -0,0 +1,9 @@
+class AddAnthropicCacheTokensToLlmUsages < ActiveRecord::Migration[7.2]
+  def change
+    # Anthropic reports cache_creation_input_tokens (charged at ~1.25x input rate
+    # for 5-min TTL) and cache_read_input_tokens (charged at 0.1x input rate).
+    # OpenAI usage rows leave these null.
+    add_column :llm_usages, :cache_creation_tokens, :integer
+    add_column :llm_usages, :cache_read_tokens, :integer
+  end
+end
--- a/db/schema.rb
+++ b/db/schema.rb
@@ -10,7 +10,7 @@
 #
 # It's strongly recommended that you check this file into your version control system.

-ActiveRecord::Schema[7.2].define(version: 2026_05_19_100000) do
+ActiveRecord::Schema[7.2].define(version: 2026_05_25_120000) do
  # These are extensions that must be enabled in order to support this database
  enable_extension "pgcrypto"
  enable_extension "plpgsql"
@@ -498,7 +498,7 @@ ActiveRecord::Schema[7.2].define(version: 2026_05_19_100000) do
    t.index ["provider_key"], name: "index_debug_log_entries_on_provider_key"
    t.index ["source"], name: "index_debug_log_entries_on_source"
    t.index ["user_id"], name: "index_debug_log_entries_on_user_id"
-    t.check_constraint "level::text = ANY (ARRAY['debug'::character varying, 'info'::character varying, 'warn'::character varying, 'error'::character varying]::text[])", name: "chk_debug_log_entries_level"
+    t.check_constraint "level::text = ANY (ARRAY['debug'::character varying::text, 'info'::character varying::text, 'warn'::character varying::text, 'error'::character varying::text])", name: "chk_debug_log_entries_level"
  end

  create_table "depositories", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t|
@@ -1063,6 +1063,8 @@ ActiveRecord::Schema[7.2].define(version: 2026_05_19_100000) do
    t.jsonb "metadata", default: {}
    t.datetime "created_at", null: false
    t.datetime "updated_at", null: false
+    t.integer "cache_creation_tokens"
+    t.integer "cache_read_tokens"
    t.index ["family_id", "created_at"], name: "index_llm_usages_on_family_id_and_created_at"
    t.index ["family_id", "operation"], name: "index_llm_usages_on_family_id_and_operation"
    t.index ["family_id"], name: "index_llm_usages_on_family_id"
--- a/test/models/llm_usage_test.rb
+++ b/test/models/llm_usage_test.rb
@@ -0,0 +1,35 @@
+require "test_helper"
+
+class LlmUsageTest < ActiveSupport::TestCase
+  test "infer_provider returns anthropic for claude models" do
+    assert_equal "anthropic", LlmUsage.infer_provider("claude-sonnet-4-6")
+    assert_equal "anthropic", LlmUsage.infer_provider("claude-opus-4-7")
+    assert_equal "anthropic", LlmUsage.infer_provider("claude-haiku-4-5")
+  end
+
+  test "infer_provider still returns openai for gpt models" do
+    assert_equal "openai", LlmUsage.infer_provider("gpt-4.1")
+    assert_equal "openai", LlmUsage.infer_provider("gpt-5")
+  end
+
+  test "calculate_cost returns Anthropic pricing for Claude models" do
+    cost = LlmUsage.calculate_cost(model: "claude-sonnet-4-6", prompt_tokens: 1_000_000, completion_tokens: 100_000)
+
+    # 1M input * $3/MTok + 100K output * $15/MTok = $3.00 + $1.50 = $4.50
+    assert_in_delta 4.5, cost, 0.0001
+  end
+
+  test "calculate_cost uses higher pricing for Opus" do
+    cost = LlmUsage.calculate_cost(model: "claude-opus-4-7", prompt_tokens: 1_000_000, completion_tokens: 0)
+
+    # 1M input * $15/MTok = $15.00
+    assert_in_delta 15.0, cost, 0.0001
+  end
+
+  test "calculate_cost uses lower pricing for Haiku" do
+    cost = LlmUsage.calculate_cost(model: "claude-haiku-4-5", prompt_tokens: 1_000_000, completion_tokens: 1_000_000)
+
+    # $1 in + $5 out = $6.00
+    assert_in_delta 6.0, cost, 0.0001
+  end
+end
--- a/test/models/provider/anthropic/auto_categorizer_test.rb
+++ b/test/models/provider/anthropic/auto_categorizer_test.rb
@@ -0,0 +1,124 @@
+require "test_helper"
+
+class Provider::Anthropic::AutoCategorizerTest < ActiveSupport::TestCase
+  setup do
+    @transactions = [
+      { id: "txn_1", name: "McDonalds", amount: 20, classification: "expense" },
+      { id: "txn_2", name: "Netflix", amount: 15, classification: "expense" }
+    ]
+    @user_categories = [
+      { id: "cat_food", name: "Fast Food", classification: "expense" },
+      { id: "cat_subs", name: "Subscriptions", classification: "expense" }
+    ]
+  end
+
+  test "issues a forced tool call and maps the response into AutoCategorization records" do
+    fake_response = build_response(content: [
+      tool_use_block(
+        id: "toolu_1",
+        name: "report_categorizations",
+        input: {
+          "categorizations" => [
+            { "transaction_id" => "txn_1", "category_name" => "Fast Food" },
+            { "transaction_id" => "txn_2", "category_name" => "Subscriptions" }
+          ]
+        }
+      )
+    ])
+    client = stub_client(fake_response, expect_request: ->(params) {
+      assert_equal "claude-haiku-4-5", params[:model]
+      assert_equal [ { type: "tool", name: "report_categorizations", disable_parallel_tool_use: true } ].first, params[:tool_choice]
+      assert_equal 1, params[:tools].size
+      assert_equal "report_categorizations", params[:tools].first[:name]
+    })
+
+    result = Provider::Anthropic::AutoCategorizer.new(
+      client,
+      model: "claude-haiku-4-5",
+      transactions: @transactions,
+      user_categories: @user_categories
+    ).auto_categorize
+
+    assert_equal 2, result.size
+    assert_equal "Fast Food", result.find { |r| r.transaction_id == "txn_1" }.category_name
+    assert_equal "Subscriptions", result.find { |r| r.transaction_id == "txn_2" }.category_name
+  end
+
+  test "normalizes null category names to nil" do
+    fake_response = build_response(content: [
+      tool_use_block(
+        id: "toolu_2",
+        name: "report_categorizations",
+        input: {
+          "categorizations" => [
+            { "transaction_id" => "txn_1", "category_name" => nil },
+            { "transaction_id" => "txn_2", "category_name" => "null" }
+          ]
+        }
+      )
+    ])
+    client = stub_client(fake_response)
+
+    result = Provider::Anthropic::AutoCategorizer.new(
+      client,
+      model: "claude-haiku-4-5",
+      transactions: @transactions,
+      user_categories: @user_categories
+    ).auto_categorize
+
+    assert_nil result.find { |r| r.transaction_id == "txn_1" }.category_name
+    assert_nil result.find { |r| r.transaction_id == "txn_2" }.category_name
+  end
+
+  test "raises when no tool_use block is present in the response" do
+    fake_response = build_response(content: [ text_block("No tool use") ])
+    client = stub_client(fake_response)
+
+    err = assert_raises(Provider::Anthropic::Error) do
+      Provider::Anthropic::AutoCategorizer.new(
+        client,
+        model: "claude-haiku-4-5",
+        transactions: @transactions,
+        user_categories: @user_categories
+      ).auto_categorize
+    end
+
+    assert_match(/did not invoke report_categorizations/i, err.message)
+  end
+
+  private
+    def stub_client(response, expect_request: nil)
+      messages = mock
+      if expect_request
+        messages.expects(:create).with do |params|
+          expect_request.call(params)
+          true
+        end.returns(response)
+      else
+        messages.stubs(:create).returns(response)
+      end
+      client = mock
+      client.stubs(:messages).returns(messages)
+      client
+    end
+
+    def build_response(content:, usage: { input_tokens: 50, output_tokens: 25 })
+      OpenStruct.new(
+        id: "msg_test",
+        model: "claude-haiku-4-5",
+        content: content,
+        usage: OpenStruct.new(
+          input_tokens: usage[:input_tokens],
+          output_tokens: usage[:output_tokens]
+        )
+      )
+    end
+
+    def text_block(text)
+      OpenStruct.new(type: :text, text: text)
+    end
+
+    def tool_use_block(id:, name:, input:)
+      OpenStruct.new(type: :tool_use, id: id, name: name, input: input)
+    end
+end
--- a/test/models/provider/anthropic/auto_merchant_detector_test.rb
+++ b/test/models/provider/anthropic/auto_merchant_detector_test.rb
@@ -0,0 +1,115 @@
+require "test_helper"
+
+class Provider::Anthropic::AutoMerchantDetectorTest < ActiveSupport::TestCase
+  setup do
+    @transactions = [
+      { id: "txn_1", name: "AMZN purchases", classification: "expense" },
+      { id: "txn_2", name: "Local diner", classification: "expense" }
+    ]
+    @user_merchants = [ { id: "m1", name: "Shooters" } ]
+  end
+
+  test "issues a forced tool call and maps merchants" do
+    fake_response = build_response(content: [
+      tool_use_block(
+        id: "toolu_1",
+        name: "report_merchants",
+        input: {
+          "merchants" => [
+            { "transaction_id" => "txn_1", "business_name" => "Amazon", "business_url" => "amazon.com" },
+            { "transaction_id" => "txn_2", "business_name" => nil, "business_url" => nil }
+          ]
+        }
+      )
+    ])
+    client = stub_client(fake_response, expect_request: ->(params) {
+      assert_equal "claude-haiku-4-5", params[:model]
+      assert_equal "report_merchants", params[:tool_choice][:name]
+      assert params[:tool_choice][:disable_parallel_tool_use]
+    })
+
+    result = Provider::Anthropic::AutoMerchantDetector.new(
+      client,
+      model: "claude-haiku-4-5",
+      transactions: @transactions,
+      user_merchants: @user_merchants
+    ).auto_detect_merchants
+
+    txn1 = result.find { |r| r.transaction_id == "txn_1" }
+    txn2 = result.find { |r| r.transaction_id == "txn_2" }
+
+    assert_equal "Amazon", txn1.business_name
+    assert_equal "amazon.com", txn1.business_url
+    assert_nil txn2.business_name
+    assert_nil txn2.business_url
+  end
+
+  test "normalizes case-insensitive matches against user_merchants" do
+    fake_response = build_response(content: [
+      tool_use_block(
+        id: "toolu_1",
+        name: "report_merchants",
+        input: {
+          "merchants" => [
+            { "transaction_id" => "txn_1", "business_name" => "shooters", "business_url" => nil }
+          ]
+        }
+      )
+    ])
+    client = stub_client(fake_response)
+
+    result = Provider::Anthropic::AutoMerchantDetector.new(
+      client,
+      model: "claude-haiku-4-5",
+      transactions: [ @transactions.first ],
+      user_merchants: @user_merchants
+    ).auto_detect_merchants
+
+    assert_equal "Shooters", result.first.business_name
+  end
+
+  test "raises when model returns no tool_use" do
+    fake_response = build_response(content: [ OpenStruct.new(type: :text, text: "I cannot help") ])
+    client = stub_client(fake_response)
+
+    err = assert_raises(Provider::Anthropic::Error) do
+      Provider::Anthropic::AutoMerchantDetector.new(
+        client,
+        model: "claude-haiku-4-5",
+        transactions: @transactions,
+        user_merchants: @user_merchants
+      ).auto_detect_merchants
+    end
+
+    assert_match(/did not invoke report_merchants/i, err.message)
+  end
+
+  private
+    def stub_client(response, expect_request: nil)
+      messages = mock
+      if expect_request
+        messages.expects(:create).with do |params|
+          expect_request.call(params)
+          true
+        end.returns(response)
+      else
+        messages.stubs(:create).returns(response)
+      end
+      client = mock
+      client.stubs(:messages).returns(messages)
+      client
+    end
+
+    def build_response(content:, usage: { input_tokens: 100, output_tokens: 40 })
+      OpenStruct.new(
+        id: "msg_test",
+        model: "claude-haiku-4-5",
+        content: content,
+        usage: OpenStruct.new(input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens])
+      )
+    end
+
+    def tool_use_block(id:, name:, input:)
+      OpenStruct.new(type: :tool_use, id: id, name: name, input: input)
+    end
+end
--- a/test/models/provider/anthropic/provider_merchant_enhancer_test.rb
+++ b/test/models/provider/anthropic/provider_merchant_enhancer_test.rb
@@ -0,0 +1,81 @@
+require "test_helper"
+
+class Provider::Anthropic::ProviderMerchantEnhancerTest < ActiveSupport::TestCase
+  setup do
+    @merchants = [
+      { id: "m1", name: "Walmart" },
+      { id: "m2", name: "Local Diner" }
+    ]
+  end
+
+  test "issues a forced tool call and maps enhancements" do
+    fake_response = build_response(content: [
+      tool_use_block(
+        id: "toolu_1",
+        name: "report_enhancements",
+        input: {
+          "merchants" => [
+            { "merchant_id" => "m1", "business_url" => "walmart.com" },
+            { "merchant_id" => "m2", "business_url" => nil }
+          ]
+        }
+      )
+    ])
+    client = stub_client(fake_response, expect_request: ->(params) {
+      assert_equal "report_enhancements", params[:tool_choice][:name]
+    })
+
+    result = Provider::Anthropic::ProviderMerchantEnhancer.new(
+      client,
+      model: "claude-haiku-4-5",
+      merchants: @merchants
+    ).enhance_merchants
+
+    assert_equal "walmart.com", result.find { |r| r.merchant_id == "m1" }.business_url
+    assert_nil result.find { |r| r.merchant_id == "m2" }.business_url
+  end
+
+  test "raises when model returns no tool_use" do
+    fake_response = build_response(content: [ OpenStruct.new(type: :text, text: "Nope") ])
+    client = stub_client(fake_response)
+
+    err = assert_raises(Provider::Anthropic::Error) do
+      Provider::Anthropic::ProviderMerchantEnhancer.new(
+        client,
+        model: "claude-haiku-4-5",
+        merchants: @merchants
+      ).enhance_merchants
+    end
+
+    assert_match(/did not invoke report_enhancements/i, err.message)
+  end
+
+  private
+    def stub_client(response, expect_request: nil)
+      messages = mock
+      if expect_request
+        messages.expects(:create).with do |params|
+          expect_request.call(params)
+          true
+        end.returns(response)
+      else
+        messages.stubs(:create).returns(response)
+      end
+      client = mock
+      client.stubs(:messages).returns(messages)
+      client
+    end
+
+    def build_response(content:, usage: { input_tokens: 60, output_tokens: 20 })
+      OpenStruct.new(
+        id: "msg_test",
+        model: "claude-haiku-4-5",
+        content: content,
+        usage: OpenStruct.new(input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens])
+      )
+    end
+
+    def tool_use_block(id:, name:, input:)
+      OpenStruct.new(type: :tool_use, id: id, name: name, input: input)
+    end
+end