mirror of
https://github.com/we-promise/sure.git
synced 2026-05-29 23:39:03 +00:00
feat(ai): add Anthropic batch ops + LLM cost ledger (2/5)
Implements auto_categorize, auto_detect_merchants, and
enhance_provider_merchants on Provider::Anthropic via forced tool calls,
plus the cost-ledger plumbing they need.
- Provider::Anthropic::AutoCategorizer, AutoMerchantDetector,
ProviderMerchantEnhancer each define a single output tool whose
input_schema mirrors the desired output, then force the model to call
it via tool_choice: { type: "tool", name: ..., disable_parallel_tool_use: true }.
Anthropic guarantees the tool_use.input matches the schema, so there
is no JSON parsing fragility, no <think> tag stripping, and no
json_object/json_schema fallback ladders.
- Concerns::UsageRecorder mirrors the OpenAI sibling but persists
cache_creation_input_tokens / cache_read_input_tokens to dedicated
columns instead of metadata.
- Migration adds cache_creation_tokens, cache_read_tokens (nullable
integers) to llm_usages. OpenAI rows leave them null.
- LlmUsage::PRICING gains Claude 4.x rows (opus-4-7 $15/$75, sonnet-4-6
$3/$15, haiku-4-5 $1/$5 per MTok). infer_provider returns "anthropic"
for claude-* via the existing exact/prefix lookup.
- Provider::Anthropic#chat_response now persists cache columns directly
rather than stashing them in metadata.
- 25-transaction batch cap mirrors the OpenAI provider so the cost
ledger sees the same shape regardless of which provider ran a batch.
Tests cover the forced-tool-call path, null/None normalization,
case-insensitive merchant matching, the missing-tool_use error path,
and Anthropic-specific pricing + provider inference on LlmUsage.
Stacked on #1983 (PR 1/5). 3/5 PDF + vision next.
This commit is contained in:
@@ -37,6 +37,15 @@ class LlmUsage < ApplicationRecord
|
||||
"google" => {
|
||||
"gemini-2.5-pro" => { prompt: 1.25, completion: 10.00 },
|
||||
"gemini-2.5-flash" => { prompt: 0.3, completion: 2.50 }
|
||||
},
|
||||
# Anthropic pricing per 1M tokens (Claude 4.x family, as of May 2026)
|
||||
# Source: https://www.anthropic.com/pricing
|
||||
"anthropic" => {
|
||||
"claude-opus-4-7" => { prompt: 15.00, completion: 75.00 },
|
||||
"claude-opus-4-6" => { prompt: 15.00, completion: 75.00 },
|
||||
"claude-sonnet-4-6" => { prompt: 3.00, completion: 15.00 },
|
||||
"claude-sonnet-4-5" => { prompt: 3.00, completion: 15.00 },
|
||||
"claude-haiku-4-5" => { prompt: 1.00, completion: 5.00 }
|
||||
}
|
||||
}.freeze
|
||||
|
||||
|
||||
@@ -66,18 +66,86 @@ class Provider::Anthropic < Provider
|
||||
@base_url.present?
|
||||
end
|
||||
|
||||
# Batch operations land in PR2 — keep the LlmConcept contract honest by
|
||||
# surfacing a clear error if a caller routes here too early.
|
||||
def auto_categorize(transactions: [], user_categories: [], model: "", family: nil, json_mode: nil)
|
||||
raise Error, "auto_categorize not yet implemented for Provider::Anthropic"
|
||||
with_provider_response do
|
||||
raise Error, "Too many transactions to auto-categorize. Max is 25 per request." if transactions.size > 25
|
||||
if user_categories.blank?
|
||||
family_id = family&.id || "unknown"
|
||||
Rails.logger.error("Cannot auto-categorize transactions for family #{family_id}: no categories available")
|
||||
raise Error, "No categories available for auto-categorization"
|
||||
end
|
||||
|
||||
effective_model = model.presence || @default_model
|
||||
|
||||
trace = create_langfuse_trace(
|
||||
name: "anthropic.auto_categorize",
|
||||
input: { transactions: transactions, user_categories: user_categories }
|
||||
)
|
||||
|
||||
result = AutoCategorizer.new(
|
||||
client,
|
||||
model: effective_model,
|
||||
transactions: transactions,
|
||||
user_categories: user_categories,
|
||||
langfuse_trace: trace,
|
||||
family: family
|
||||
).auto_categorize
|
||||
|
||||
upsert_langfuse_trace(trace: trace, output: result.map(&:to_h))
|
||||
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
def auto_detect_merchants(transactions: [], user_merchants: [], model: "", family: nil, json_mode: nil)
|
||||
raise Error, "auto_detect_merchants not yet implemented for Provider::Anthropic"
|
||||
with_provider_response do
|
||||
raise Error, "Too many transactions to auto-detect merchants. Max is 25 per request." if transactions.size > 25
|
||||
|
||||
effective_model = model.presence || @default_model
|
||||
|
||||
trace = create_langfuse_trace(
|
||||
name: "anthropic.auto_detect_merchants",
|
||||
input: { transactions: transactions, user_merchants: user_merchants }
|
||||
)
|
||||
|
||||
result = AutoMerchantDetector.new(
|
||||
client,
|
||||
model: effective_model,
|
||||
transactions: transactions,
|
||||
user_merchants: user_merchants,
|
||||
langfuse_trace: trace,
|
||||
family: family
|
||||
).auto_detect_merchants
|
||||
|
||||
upsert_langfuse_trace(trace: trace, output: result.map(&:to_h))
|
||||
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
def enhance_provider_merchants(merchants: [], model: "", family: nil, json_mode: nil)
|
||||
raise Error, "enhance_provider_merchants not yet implemented for Provider::Anthropic"
|
||||
with_provider_response do
|
||||
raise Error, "Too many merchants to enhance. Max is 25 per request." if merchants.size > 25
|
||||
|
||||
effective_model = model.presence || @default_model
|
||||
|
||||
trace = create_langfuse_trace(
|
||||
name: "anthropic.enhance_provider_merchants",
|
||||
input: { merchants: merchants }
|
||||
)
|
||||
|
||||
result = ProviderMerchantEnhancer.new(
|
||||
client,
|
||||
model: effective_model,
|
||||
merchants: merchants,
|
||||
langfuse_trace: trace,
|
||||
family: family
|
||||
).enhance_merchants
|
||||
|
||||
upsert_langfuse_trace(trace: trace, output: result.map(&:to_h))
|
||||
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
def supports_pdf_processing?(model: @default_model)
|
||||
@@ -345,8 +413,10 @@ class Provider::Anthropic < Provider
|
||||
prompt_tokens: prompt_tokens,
|
||||
completion_tokens: completion_tokens,
|
||||
total_tokens: total_tokens,
|
||||
cache_creation_tokens: usage["cache_creation_input_tokens"],
|
||||
cache_read_tokens: usage["cache_read_input_tokens"],
|
||||
estimated_cost: estimated_cost,
|
||||
metadata: usage.slice("cache_creation_input_tokens", "cache_read_input_tokens").compact
|
||||
metadata: {}
|
||||
)
|
||||
rescue => e
|
||||
Rails.logger.error("Failed to record LLM usage: #{e.message}")
|
||||
|
||||
176
app/models/provider/anthropic/auto_categorizer.rb
Normal file
176
app/models/provider/anthropic/auto_categorizer.rb
Normal file
@@ -0,0 +1,176 @@
|
||||
class Provider::Anthropic::AutoCategorizer
|
||||
include Provider::Anthropic::Concerns::UsageRecorder
|
||||
|
||||
TOOL_NAME = "report_categorizations".freeze
|
||||
|
||||
attr_reader :client, :model, :transactions, :user_categories, :langfuse_trace, :family
|
||||
|
||||
def initialize(client, model:, transactions: [], user_categories: [], langfuse_trace: nil, family: nil)
|
||||
@client = client
|
||||
@model = model
|
||||
@transactions = transactions
|
||||
@user_categories = user_categories
|
||||
@langfuse_trace = langfuse_trace
|
||||
@family = family
|
||||
end
|
||||
|
||||
def auto_categorize
|
||||
span = langfuse_trace&.span(name: "auto_categorize_api_call", input: {
|
||||
model: model,
|
||||
transactions: transactions,
|
||||
user_categories: user_categories
|
||||
})
|
||||
|
||||
response = client.messages.create(
|
||||
model: model,
|
||||
max_tokens: max_tokens,
|
||||
system_: instructions,
|
||||
messages: [ { role: "user", content: user_message } ],
|
||||
tools: [ output_tool ],
|
||||
tool_choice: { type: "tool", name: TOOL_NAME, disable_parallel_tool_use: true }
|
||||
)
|
||||
|
||||
categorizations = extract_categorizations(response)
|
||||
result = build_response(categorizations)
|
||||
|
||||
record_usage(model, response.usage, operation: "auto_categorize", metadata: {
|
||||
transaction_count: transactions.size,
|
||||
category_count: user_categories.size
|
||||
})
|
||||
|
||||
span&.end(output: result.map(&:to_h), usage: usage_hash(response.usage))
|
||||
result
|
||||
rescue => e
|
||||
span&.end(output: { error: e.message }, level: "ERROR")
|
||||
record_usage_error(model, operation: "auto_categorize", error: e, metadata: {
|
||||
transaction_count: transactions.size,
|
||||
category_count: user_categories.size
|
||||
})
|
||||
raise
|
||||
end
|
||||
|
||||
private
|
||||
AutoCategorization = Provider::LlmConcept::AutoCategorization
|
||||
|
||||
def max_tokens
|
||||
ENV.fetch("ANTHROPIC_MAX_TOKENS", 4096).to_i
|
||||
end
|
||||
|
||||
def output_tool
|
||||
{
|
||||
name: TOOL_NAME,
|
||||
description: "Return the categorization decision for each input transaction.",
|
||||
input_schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
categorizations: {
|
||||
type: "array",
|
||||
description: "One categorization per input transaction.",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
transaction_id: {
|
||||
type: "string",
|
||||
description: "The internal ID of the original transaction",
|
||||
enum: transactions.map { |t| t[:id] }
|
||||
},
|
||||
category_name: {
|
||||
type: [ "string", "null" ],
|
||||
description: "Matched category name from the user's categories, or null when uncertain.",
|
||||
enum: [ *user_categories.map { |c| c[:name] }, nil ]
|
||||
}
|
||||
},
|
||||
required: [ "transaction_id", "category_name" ],
|
||||
additionalProperties: false
|
||||
}
|
||||
}
|
||||
},
|
||||
required: [ "categorizations" ],
|
||||
additionalProperties: false
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
def instructions
|
||||
<<~INSTRUCTIONS.strip_heredoc
|
||||
You are an assistant to a consumer personal finance app. You will be provided a list of the user's
|
||||
transactions and a list of the user's categories. Your job is to auto-categorize each transaction
|
||||
and return the result via the report_categorizations tool.
|
||||
|
||||
Follow ALL the rules below:
|
||||
|
||||
- Return one result per transaction, correlated by transaction_id
|
||||
- Use the most specific category possible (subcategory over parent category)
|
||||
- Any category may be used regardless of whether the transaction is income or expense
|
||||
- Return null for category_name when you are not 60%+ confident, or when the description is
|
||||
generic/ambiguous (e.g., "POS DEBIT", "ACH WITHDRAWAL", "CHECK #1234")
|
||||
- The `hint` field on a transaction (when present) comes from third-party aggregators and may
|
||||
or may not match the user's categories — treat it as a weak signal
|
||||
INSTRUCTIONS
|
||||
end
|
||||
|
||||
def user_message
|
||||
<<~MESSAGE.strip_heredoc
|
||||
Here are the user's available categories in JSON:
|
||||
|
||||
```json
|
||||
#{user_categories.to_json}
|
||||
```
|
||||
|
||||
Auto-categorize the following transactions:
|
||||
|
||||
```json
|
||||
#{transactions.to_json}
|
||||
```
|
||||
MESSAGE
|
||||
end
|
||||
|
||||
def extract_categorizations(response)
|
||||
tool_use = Array(response.content).find { |block| block_type(block) == :tool_use }
|
||||
raise Provider::Anthropic::Error, "Model did not invoke #{TOOL_NAME}" unless tool_use
|
||||
|
||||
input = block_input(tool_use)
|
||||
input = JSON.parse(input) if input.is_a?(String)
|
||||
categorizations = input.is_a?(Hash) ? (input["categorizations"] || input[:categorizations]) : nil
|
||||
|
||||
raise Provider::Anthropic::Error, "Tool call missing categorizations" unless categorizations.is_a?(Array)
|
||||
categorizations
|
||||
end
|
||||
|
||||
def build_response(categorizations)
|
||||
categorizations.map do |c|
|
||||
category_name = c["category_name"] || c[:category_name]
|
||||
AutoCategorization.new(
|
||||
transaction_id: c["transaction_id"] || c[:transaction_id],
|
||||
category_name: normalize_category(category_name)
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_category(value)
|
||||
return nil if value.nil?
|
||||
str = value.to_s.strip
|
||||
return nil if str.empty? || str.casecmp("null").zero?
|
||||
|
||||
match = user_categories.find { |c| c[:name].to_s.casecmp(str).zero? }
|
||||
match ? match[:name] : str
|
||||
end
|
||||
|
||||
def block_type(block)
|
||||
raw = block.respond_to?(:type) ? block.type : block[:type] || block["type"]
|
||||
raw.to_s.to_sym
|
||||
end
|
||||
|
||||
def block_input(block)
|
||||
block.respond_to?(:input) ? block.input : (block[:input] || block["input"])
|
||||
end
|
||||
|
||||
def usage_hash(raw_usage)
|
||||
return {} unless raw_usage
|
||||
{
|
||||
"input_tokens" => raw_usage.input_tokens.to_i,
|
||||
"output_tokens" => raw_usage.output_tokens.to_i,
|
||||
"total_tokens" => raw_usage.input_tokens.to_i + raw_usage.output_tokens.to_i
|
||||
}
|
||||
end
|
||||
end
|
||||
188
app/models/provider/anthropic/auto_merchant_detector.rb
Normal file
188
app/models/provider/anthropic/auto_merchant_detector.rb
Normal file
@@ -0,0 +1,188 @@
|
||||
class Provider::Anthropic::AutoMerchantDetector
|
||||
include Provider::Anthropic::Concerns::UsageRecorder
|
||||
|
||||
TOOL_NAME = "report_merchants".freeze
|
||||
|
||||
attr_reader :client, :model, :transactions, :user_merchants, :langfuse_trace, :family
|
||||
|
||||
def initialize(client, model:, transactions: [], user_merchants: [], langfuse_trace: nil, family: nil)
|
||||
@client = client
|
||||
@model = model
|
||||
@transactions = transactions
|
||||
@user_merchants = user_merchants
|
||||
@langfuse_trace = langfuse_trace
|
||||
@family = family
|
||||
end
|
||||
|
||||
def auto_detect_merchants
|
||||
span = langfuse_trace&.span(name: "auto_detect_merchants_api_call", input: {
|
||||
model: model,
|
||||
transactions: transactions,
|
||||
user_merchants: user_merchants
|
||||
})
|
||||
|
||||
response = client.messages.create(
|
||||
model: model,
|
||||
max_tokens: max_tokens,
|
||||
system_: instructions,
|
||||
messages: [ { role: "user", content: user_message } ],
|
||||
tools: [ output_tool ],
|
||||
tool_choice: { type: "tool", name: TOOL_NAME, disable_parallel_tool_use: true }
|
||||
)
|
||||
|
||||
merchants_data = extract_merchants(response)
|
||||
result = build_response(merchants_data)
|
||||
|
||||
record_usage(model, response.usage, operation: "auto_detect_merchants", metadata: {
|
||||
transaction_count: transactions.size,
|
||||
merchant_count: user_merchants.size
|
||||
})
|
||||
|
||||
span&.end(output: result.map(&:to_h), usage: usage_hash(response.usage))
|
||||
result
|
||||
rescue => e
|
||||
span&.end(output: { error: e.message }, level: "ERROR")
|
||||
record_usage_error(model, operation: "auto_detect_merchants", error: e, metadata: {
|
||||
transaction_count: transactions.size,
|
||||
merchant_count: user_merchants.size
|
||||
})
|
||||
raise
|
||||
end
|
||||
|
||||
private
|
||||
AutoDetectedMerchant = Provider::LlmConcept::AutoDetectedMerchant
|
||||
|
||||
def max_tokens
|
||||
ENV.fetch("ANTHROPIC_MAX_TOKENS", 4096).to_i
|
||||
end
|
||||
|
||||
def output_tool
|
||||
{
|
||||
name: TOOL_NAME,
|
||||
description: "Return the detected business name and website URL for each input transaction.",
|
||||
input_schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
merchants: {
|
||||
type: "array",
|
||||
description: "One detection result per input transaction.",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
transaction_id: {
|
||||
type: "string",
|
||||
description: "The internal ID of the original transaction",
|
||||
enum: transactions.map { |t| t[:id] }
|
||||
},
|
||||
business_name: {
|
||||
type: [ "string", "null" ],
|
||||
description: "Detected business name, or null if uncertain or generic"
|
||||
},
|
||||
business_url: {
|
||||
type: [ "string", "null" ],
|
||||
description: "Business website without the www. subdomain (e.g., \"amazon.com\"), or null if uncertain"
|
||||
}
|
||||
},
|
||||
required: [ "transaction_id", "business_name", "business_url" ],
|
||||
additionalProperties: false
|
||||
}
|
||||
}
|
||||
},
|
||||
required: [ "merchants" ],
|
||||
additionalProperties: false
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
def instructions
|
||||
<<~INSTRUCTIONS.strip_heredoc
|
||||
You are an assistant to a consumer personal finance app. Detect the business name and website URL
|
||||
for each transaction and return the result via the report_merchants tool.
|
||||
|
||||
Follow ALL the rules below:
|
||||
|
||||
- One result per transaction, correlated by transaction_id
|
||||
- Do NOT include the www. subdomain in business_url ("amazon.com", not "www.amazon.com")
|
||||
- User-provided merchants should only be used when the match is unambiguous
|
||||
- Favor null over false positives; only return values when 80%+ confident
|
||||
- NEVER return a name/URL for generic descriptions ("Paycheck", "Local diner", "ATM", "POS DEBIT")
|
||||
|
||||
Decision order:
|
||||
1. Identify from your knowledge of global businesses
|
||||
2. Otherwise, match against the user-provided merchants
|
||||
3. Otherwise, return null for both fields
|
||||
INSTRUCTIONS
|
||||
end
|
||||
|
||||
def user_message
|
||||
<<~MESSAGE.strip_heredoc
|
||||
User's known merchants:
|
||||
|
||||
```json
|
||||
#{user_merchants.to_json}
|
||||
```
|
||||
|
||||
Transactions to analyze:
|
||||
|
||||
```json
|
||||
#{transactions.to_json}
|
||||
```
|
||||
MESSAGE
|
||||
end
|
||||
|
||||
def extract_merchants(response)
|
||||
tool_use = Array(response.content).find { |block| block_type(block) == :tool_use }
|
||||
raise Provider::Anthropic::Error, "Model did not invoke #{TOOL_NAME}" unless tool_use
|
||||
|
||||
input = block_input(tool_use)
|
||||
input = JSON.parse(input) if input.is_a?(String)
|
||||
merchants = input.is_a?(Hash) ? (input["merchants"] || input[:merchants]) : nil
|
||||
|
||||
raise Provider::Anthropic::Error, "Tool call missing merchants" unless merchants.is_a?(Array)
|
||||
merchants
|
||||
end
|
||||
|
||||
def build_response(merchants)
|
||||
merchants.map do |m|
|
||||
AutoDetectedMerchant.new(
|
||||
transaction_id: m["transaction_id"] || m[:transaction_id],
|
||||
business_name: normalize_merchant_name(m["business_name"] || m[:business_name]),
|
||||
business_url: normalize_value(m["business_url"] || m[:business_url])
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_value(value)
|
||||
return nil if value.nil?
|
||||
str = value.to_s.strip
|
||||
return nil if str.empty? || str.casecmp("null").zero?
|
||||
str
|
||||
end
|
||||
|
||||
def normalize_merchant_name(value)
|
||||
str = normalize_value(value)
|
||||
return nil unless str
|
||||
return str if user_merchants.blank?
|
||||
|
||||
match = user_merchants.find { |m| m[:name].to_s.casecmp(str).zero? }
|
||||
match ? match[:name] : str
|
||||
end
|
||||
|
||||
def block_type(block)
|
||||
raw = block.respond_to?(:type) ? block.type : block[:type] || block["type"]
|
||||
raw.to_s.to_sym
|
||||
end
|
||||
|
||||
def block_input(block)
|
||||
block.respond_to?(:input) ? block.input : (block[:input] || block["input"])
|
||||
end
|
||||
|
||||
def usage_hash(raw_usage)
|
||||
return {} unless raw_usage
|
||||
{
|
||||
"input_tokens" => raw_usage.input_tokens.to_i,
|
||||
"output_tokens" => raw_usage.output_tokens.to_i,
|
||||
"total_tokens" => raw_usage.input_tokens.to_i + raw_usage.output_tokens.to_i
|
||||
}
|
||||
end
|
||||
end
|
||||
75
app/models/provider/anthropic/concerns/usage_recorder.rb
Normal file
75
app/models/provider/anthropic/concerns/usage_recorder.rb
Normal file
@@ -0,0 +1,75 @@
|
||||
module Provider::Anthropic::Concerns::UsageRecorder
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
private
|
||||
|
||||
# Persists an LlmUsage row from an Anthropic Message#usage object.
|
||||
# Returns nil if no family is attached (e.g., system-initiated calls).
|
||||
def record_usage(model_name, raw_usage, operation:, metadata: {})
|
||||
return unless family && raw_usage
|
||||
|
||||
input_tokens = raw_usage.input_tokens.to_i
|
||||
output_tokens = raw_usage.output_tokens.to_i
|
||||
total_tokens = input_tokens + output_tokens
|
||||
cache_creation = raw_usage.respond_to?(:cache_creation_input_tokens) ? raw_usage.cache_creation_input_tokens : nil
|
||||
cache_read = raw_usage.respond_to?(:cache_read_input_tokens) ? raw_usage.cache_read_input_tokens : nil
|
||||
|
||||
estimated_cost = LlmUsage.calculate_cost(
|
||||
model: model_name,
|
||||
prompt_tokens: input_tokens,
|
||||
completion_tokens: output_tokens
|
||||
)
|
||||
|
||||
family.llm_usages.create!(
|
||||
provider: "anthropic",
|
||||
model: model_name,
|
||||
operation: operation,
|
||||
prompt_tokens: input_tokens,
|
||||
completion_tokens: output_tokens,
|
||||
total_tokens: total_tokens,
|
||||
cache_creation_tokens: cache_creation,
|
||||
cache_read_tokens: cache_read,
|
||||
estimated_cost: estimated_cost,
|
||||
metadata: metadata
|
||||
)
|
||||
|
||||
Rails.logger.info("LLM usage recorded - Provider: anthropic, Operation: #{operation}, Cost: #{estimated_cost.inspect}")
|
||||
rescue => e
|
||||
Rails.logger.error("Failed to record LLM usage: #{e.message}")
|
||||
end
|
||||
|
||||
def record_usage_error(model_name, operation:, error:, metadata: {})
|
||||
return unless family && error
|
||||
|
||||
http_status_code = extract_http_status_code(error)
|
||||
|
||||
family.llm_usages.create!(
|
||||
provider: "anthropic",
|
||||
model: model_name,
|
||||
operation: operation,
|
||||
prompt_tokens: 0,
|
||||
completion_tokens: 0,
|
||||
total_tokens: 0,
|
||||
estimated_cost: nil,
|
||||
metadata: metadata.merge(error: safe_error_message(error), http_status_code: http_status_code)
|
||||
)
|
||||
rescue => e
|
||||
Rails.logger.error("Failed to record LLM usage error: #{e.message}")
|
||||
end
|
||||
|
||||
def extract_http_status_code(error)
|
||||
if error.respond_to?(:status)
|
||||
error.status
|
||||
elsif error.respond_to?(:http_status)
|
||||
error.http_status
|
||||
elsif safe_error_message(error) =~ /(\d{3})/
|
||||
$1.to_i
|
||||
end
|
||||
end
|
||||
|
||||
def safe_error_message(error)
|
||||
error&.message
|
||||
rescue => e
|
||||
"(message unavailable: #{e.class})"
|
||||
end
|
||||
end
|
||||
154
app/models/provider/anthropic/provider_merchant_enhancer.rb
Normal file
154
app/models/provider/anthropic/provider_merchant_enhancer.rb
Normal file
@@ -0,0 +1,154 @@
|
||||
class Provider::Anthropic::ProviderMerchantEnhancer
|
||||
include Provider::Anthropic::Concerns::UsageRecorder
|
||||
|
||||
TOOL_NAME = "report_enhancements".freeze
|
||||
|
||||
attr_reader :client, :model, :merchants, :langfuse_trace, :family
|
||||
|
||||
def initialize(client, model:, merchants: [], langfuse_trace: nil, family: nil)
|
||||
@client = client
|
||||
@model = model
|
||||
@merchants = merchants
|
||||
@langfuse_trace = langfuse_trace
|
||||
@family = family
|
||||
end
|
||||
|
||||
def enhance_merchants
|
||||
span = langfuse_trace&.span(name: "enhance_provider_merchants_api_call", input: {
|
||||
model: model,
|
||||
merchants: merchants
|
||||
})
|
||||
|
||||
response = client.messages.create(
|
||||
model: model,
|
||||
max_tokens: max_tokens,
|
||||
system_: instructions,
|
||||
messages: [ { role: "user", content: user_message } ],
|
||||
tools: [ output_tool ],
|
||||
tool_choice: { type: "tool", name: TOOL_NAME, disable_parallel_tool_use: true }
|
||||
)
|
||||
|
||||
enhanced = extract_enhancements(response)
|
||||
result = build_response(enhanced)
|
||||
|
||||
record_usage(model, response.usage, operation: "enhance_provider_merchants", metadata: { merchant_count: merchants.size })
|
||||
|
||||
span&.end(output: result.map(&:to_h), usage: usage_hash(response.usage))
|
||||
result
|
||||
rescue => e
|
||||
span&.end(output: { error: e.message }, level: "ERROR")
|
||||
record_usage_error(model, operation: "enhance_provider_merchants", error: e, metadata: { merchant_count: merchants.size })
|
||||
raise
|
||||
end
|
||||
|
||||
private
|
||||
EnhancedMerchant = Provider::LlmConcept::EnhancedMerchant
|
||||
|
||||
def max_tokens
|
||||
ENV.fetch("ANTHROPIC_MAX_TOKENS", 4096).to_i
|
||||
end
|
||||
|
||||
def output_tool
|
||||
{
|
||||
name: TOOL_NAME,
|
||||
description: "Return the business website URL for each input merchant.",
|
||||
input_schema: {
|
||||
type: "object",
|
||||
properties: {
|
||||
merchants: {
|
||||
type: "array",
|
||||
description: "One result per input merchant.",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
merchant_id: {
|
||||
type: "string",
|
||||
description: "The merchant's internal ID",
|
||||
enum: merchants.map { |m| m[:id].to_s }
|
||||
},
|
||||
business_url: {
|
||||
type: [ "string", "null" ],
|
||||
description: "Business website without the www. subdomain, or null if uncertain or local"
|
||||
}
|
||||
},
|
||||
required: [ "merchant_id", "business_url" ],
|
||||
additionalProperties: false
|
||||
}
|
||||
}
|
||||
},
|
||||
required: [ "merchants" ],
|
||||
additionalProperties: false
|
||||
}
|
||||
}
|
||||
end
|
||||
|
||||
def instructions
|
||||
<<~INSTRUCTIONS.strip_heredoc
|
||||
You are an assistant to a consumer personal finance app. Given a list of merchant names, identify
|
||||
the business website URL for each and return the result via the report_enhancements tool.
|
||||
|
||||
Follow ALL the rules below:
|
||||
|
||||
- One result per merchant, correlated by merchant_id
|
||||
- Do NOT include the www. subdomain ("walmart.com", not "www.walmart.com")
|
||||
- Favor null over false positives; only return a URL when 80%+ confident
|
||||
- NEVER return a URL for generic or local-only merchants ("Local diner", "Gas station", "ATM withdrawal")
|
||||
INSTRUCTIONS
|
||||
end
|
||||
|
||||
def user_message
|
||||
<<~MESSAGE.strip_heredoc
|
||||
Enhance the following merchants by identifying each one's website URL:
|
||||
|
||||
```json
|
||||
#{merchants.to_json}
|
||||
```
|
||||
MESSAGE
|
||||
end
|
||||
|
||||
def extract_enhancements(response)
|
||||
tool_use = Array(response.content).find { |block| block_type(block) == :tool_use }
|
||||
raise Provider::Anthropic::Error, "Model did not invoke #{TOOL_NAME}" unless tool_use
|
||||
|
||||
input = block_input(tool_use)
|
||||
input = JSON.parse(input) if input.is_a?(String)
|
||||
enhanced = input.is_a?(Hash) ? (input["merchants"] || input[:merchants]) : nil
|
||||
|
||||
raise Provider::Anthropic::Error, "Tool call missing merchants" unless enhanced.is_a?(Array)
|
||||
enhanced
|
||||
end
|
||||
|
||||
def build_response(enhanced)
|
||||
enhanced.map do |m|
|
||||
EnhancedMerchant.new(
|
||||
merchant_id: m["merchant_id"] || m[:merchant_id],
|
||||
business_url: normalize_value(m["business_url"] || m[:business_url])
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_value(value)
|
||||
return nil if value.nil?
|
||||
str = value.to_s.strip
|
||||
return nil if str.empty? || str.casecmp("null").zero?
|
||||
str
|
||||
end
|
||||
|
||||
def block_type(block)
|
||||
raw = block.respond_to?(:type) ? block.type : block[:type] || block["type"]
|
||||
raw.to_s.to_sym
|
||||
end
|
||||
|
||||
def block_input(block)
|
||||
block.respond_to?(:input) ? block.input : (block[:input] || block["input"])
|
||||
end
|
||||
|
||||
def usage_hash(raw_usage)
|
||||
return {} unless raw_usage
|
||||
{
|
||||
"input_tokens" => raw_usage.input_tokens.to_i,
|
||||
"output_tokens" => raw_usage.output_tokens.to_i,
|
||||
"total_tokens" => raw_usage.input_tokens.to_i + raw_usage.output_tokens.to_i
|
||||
}
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,9 @@
|
||||
class AddAnthropicCacheTokensToLlmUsages < ActiveRecord::Migration[7.2]
|
||||
def change
|
||||
# Anthropic reports cache_creation_input_tokens (charged at ~1.25x input rate
|
||||
# for 5-min TTL) and cache_read_input_tokens (charged at 0.1x input rate).
|
||||
# OpenAI usage rows leave these null.
|
||||
add_column :llm_usages, :cache_creation_tokens, :integer
|
||||
add_column :llm_usages, :cache_read_tokens, :integer
|
||||
end
|
||||
end
|
||||
6
db/schema.rb
generated
6
db/schema.rb
generated
@@ -10,7 +10,7 @@
|
||||
#
|
||||
# It's strongly recommended that you check this file into your version control system.
|
||||
|
||||
ActiveRecord::Schema[7.2].define(version: 2026_05_19_100000) do
|
||||
ActiveRecord::Schema[7.2].define(version: 2026_05_25_120000) do
|
||||
# These are extensions that must be enabled in order to support this database
|
||||
enable_extension "pgcrypto"
|
||||
enable_extension "plpgsql"
|
||||
@@ -498,7 +498,7 @@ ActiveRecord::Schema[7.2].define(version: 2026_05_19_100000) do
|
||||
t.index ["provider_key"], name: "index_debug_log_entries_on_provider_key"
|
||||
t.index ["source"], name: "index_debug_log_entries_on_source"
|
||||
t.index ["user_id"], name: "index_debug_log_entries_on_user_id"
|
||||
t.check_constraint "level::text = ANY (ARRAY['debug'::character varying, 'info'::character varying, 'warn'::character varying, 'error'::character varying]::text[])", name: "chk_debug_log_entries_level"
|
||||
t.check_constraint "level::text = ANY (ARRAY['debug'::character varying::text, 'info'::character varying::text, 'warn'::character varying::text, 'error'::character varying::text])", name: "chk_debug_log_entries_level"
|
||||
end
|
||||
|
||||
create_table "depositories", id: :uuid, default: -> { "gen_random_uuid()" }, force: :cascade do |t|
|
||||
@@ -1063,6 +1063,8 @@ ActiveRecord::Schema[7.2].define(version: 2026_05_19_100000) do
|
||||
t.jsonb "metadata", default: {}
|
||||
t.datetime "created_at", null: false
|
||||
t.datetime "updated_at", null: false
|
||||
t.integer "cache_creation_tokens"
|
||||
t.integer "cache_read_tokens"
|
||||
t.index ["family_id", "created_at"], name: "index_llm_usages_on_family_id_and_created_at"
|
||||
t.index ["family_id", "operation"], name: "index_llm_usages_on_family_id_and_operation"
|
||||
t.index ["family_id"], name: "index_llm_usages_on_family_id"
|
||||
|
||||
35
test/models/llm_usage_test.rb
Normal file
35
test/models/llm_usage_test.rb
Normal file
@@ -0,0 +1,35 @@
|
||||
require "test_helper"
|
||||
|
||||
class LlmUsageTest < ActiveSupport::TestCase
|
||||
test "infer_provider returns anthropic for claude models" do
|
||||
assert_equal "anthropic", LlmUsage.infer_provider("claude-sonnet-4-6")
|
||||
assert_equal "anthropic", LlmUsage.infer_provider("claude-opus-4-7")
|
||||
assert_equal "anthropic", LlmUsage.infer_provider("claude-haiku-4-5")
|
||||
end
|
||||
|
||||
test "infer_provider still returns openai for gpt models" do
|
||||
assert_equal "openai", LlmUsage.infer_provider("gpt-4.1")
|
||||
assert_equal "openai", LlmUsage.infer_provider("gpt-5")
|
||||
end
|
||||
|
||||
test "calculate_cost returns Anthropic pricing for Claude models" do
|
||||
cost = LlmUsage.calculate_cost(model: "claude-sonnet-4-6", prompt_tokens: 1_000_000, completion_tokens: 100_000)
|
||||
|
||||
# 1M input * $3/MTok + 100K output * $15/MTok = $3.00 + $1.50 = $4.50
|
||||
assert_in_delta 4.5, cost, 0.0001
|
||||
end
|
||||
|
||||
test "calculate_cost uses higher pricing for Opus" do
|
||||
cost = LlmUsage.calculate_cost(model: "claude-opus-4-7", prompt_tokens: 1_000_000, completion_tokens: 0)
|
||||
|
||||
# 1M input * $15/MTok = $15.00
|
||||
assert_in_delta 15.0, cost, 0.0001
|
||||
end
|
||||
|
||||
test "calculate_cost uses lower pricing for Haiku" do
|
||||
cost = LlmUsage.calculate_cost(model: "claude-haiku-4-5", prompt_tokens: 1_000_000, completion_tokens: 1_000_000)
|
||||
|
||||
# $1 in + $5 out = $6.00
|
||||
assert_in_delta 6.0, cost, 0.0001
|
||||
end
|
||||
end
|
||||
124
test/models/provider/anthropic/auto_categorizer_test.rb
Normal file
124
test/models/provider/anthropic/auto_categorizer_test.rb
Normal file
@@ -0,0 +1,124 @@
|
||||
require "test_helper"
|
||||
|
||||
class Provider::Anthropic::AutoCategorizerTest < ActiveSupport::TestCase
|
||||
setup do
|
||||
@transactions = [
|
||||
{ id: "txn_1", name: "McDonalds", amount: 20, classification: "expense" },
|
||||
{ id: "txn_2", name: "Netflix", amount: 15, classification: "expense" }
|
||||
]
|
||||
@user_categories = [
|
||||
{ id: "cat_food", name: "Fast Food", classification: "expense" },
|
||||
{ id: "cat_subs", name: "Subscriptions", classification: "expense" }
|
||||
]
|
||||
end
|
||||
|
||||
test "issues a forced tool call and maps the response into AutoCategorization records" do
|
||||
fake_response = build_response(content: [
|
||||
tool_use_block(
|
||||
id: "toolu_1",
|
||||
name: "report_categorizations",
|
||||
input: {
|
||||
"categorizations" => [
|
||||
{ "transaction_id" => "txn_1", "category_name" => "Fast Food" },
|
||||
{ "transaction_id" => "txn_2", "category_name" => "Subscriptions" }
|
||||
]
|
||||
}
|
||||
)
|
||||
])
|
||||
client = stub_client(fake_response, expect_request: ->(params) {
|
||||
assert_equal "claude-haiku-4-5", params[:model]
|
||||
assert_equal [ { type: "tool", name: "report_categorizations", disable_parallel_tool_use: true } ].first, params[:tool_choice]
|
||||
assert_equal 1, params[:tools].size
|
||||
assert_equal "report_categorizations", params[:tools].first[:name]
|
||||
})
|
||||
|
||||
result = Provider::Anthropic::AutoCategorizer.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
transactions: @transactions,
|
||||
user_categories: @user_categories
|
||||
).auto_categorize
|
||||
|
||||
assert_equal 2, result.size
|
||||
assert_equal "Fast Food", result.find { |r| r.transaction_id == "txn_1" }.category_name
|
||||
assert_equal "Subscriptions", result.find { |r| r.transaction_id == "txn_2" }.category_name
|
||||
end
|
||||
|
||||
test "normalizes null category names to nil" do
|
||||
fake_response = build_response(content: [
|
||||
tool_use_block(
|
||||
id: "toolu_2",
|
||||
name: "report_categorizations",
|
||||
input: {
|
||||
"categorizations" => [
|
||||
{ "transaction_id" => "txn_1", "category_name" => nil },
|
||||
{ "transaction_id" => "txn_2", "category_name" => "null" }
|
||||
]
|
||||
}
|
||||
)
|
||||
])
|
||||
client = stub_client(fake_response)
|
||||
|
||||
result = Provider::Anthropic::AutoCategorizer.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
transactions: @transactions,
|
||||
user_categories: @user_categories
|
||||
).auto_categorize
|
||||
|
||||
assert_nil result.find { |r| r.transaction_id == "txn_1" }.category_name
|
||||
assert_nil result.find { |r| r.transaction_id == "txn_2" }.category_name
|
||||
end
|
||||
|
||||
test "raises when no tool_use block is present in the response" do
|
||||
fake_response = build_response(content: [ text_block("No tool use") ])
|
||||
client = stub_client(fake_response)
|
||||
|
||||
err = assert_raises(Provider::Anthropic::Error) do
|
||||
Provider::Anthropic::AutoCategorizer.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
transactions: @transactions,
|
||||
user_categories: @user_categories
|
||||
).auto_categorize
|
||||
end
|
||||
|
||||
assert_match(/did not invoke report_categorizations/i, err.message)
|
||||
end
|
||||
|
||||
private
|
||||
def stub_client(response, expect_request: nil)
|
||||
messages = mock
|
||||
if expect_request
|
||||
messages.expects(:create).with do |params|
|
||||
expect_request.call(params)
|
||||
true
|
||||
end.returns(response)
|
||||
else
|
||||
messages.stubs(:create).returns(response)
|
||||
end
|
||||
client = mock
|
||||
client.stubs(:messages).returns(messages)
|
||||
client
|
||||
end
|
||||
|
||||
def build_response(content:, usage: { input_tokens: 50, output_tokens: 25 })
|
||||
OpenStruct.new(
|
||||
id: "msg_test",
|
||||
model: "claude-haiku-4-5",
|
||||
content: content,
|
||||
usage: OpenStruct.new(
|
||||
input_tokens: usage[:input_tokens],
|
||||
output_tokens: usage[:output_tokens]
|
||||
)
|
||||
)
|
||||
end
|
||||
|
||||
def text_block(text)
|
||||
OpenStruct.new(type: :text, text: text)
|
||||
end
|
||||
|
||||
def tool_use_block(id:, name:, input:)
|
||||
OpenStruct.new(type: :tool_use, id: id, name: name, input: input)
|
||||
end
|
||||
end
|
||||
115
test/models/provider/anthropic/auto_merchant_detector_test.rb
Normal file
115
test/models/provider/anthropic/auto_merchant_detector_test.rb
Normal file
@@ -0,0 +1,115 @@
|
||||
require "test_helper"
|
||||
|
||||
class Provider::Anthropic::AutoMerchantDetectorTest < ActiveSupport::TestCase
|
||||
setup do
|
||||
@transactions = [
|
||||
{ id: "txn_1", name: "AMZN purchases", classification: "expense" },
|
||||
{ id: "txn_2", name: "Local diner", classification: "expense" }
|
||||
]
|
||||
@user_merchants = [ { id: "m1", name: "Shooters" } ]
|
||||
end
|
||||
|
||||
test "issues a forced tool call and maps merchants" do
|
||||
fake_response = build_response(content: [
|
||||
tool_use_block(
|
||||
id: "toolu_1",
|
||||
name: "report_merchants",
|
||||
input: {
|
||||
"merchants" => [
|
||||
{ "transaction_id" => "txn_1", "business_name" => "Amazon", "business_url" => "amazon.com" },
|
||||
{ "transaction_id" => "txn_2", "business_name" => nil, "business_url" => nil }
|
||||
]
|
||||
}
|
||||
)
|
||||
])
|
||||
client = stub_client(fake_response, expect_request: ->(params) {
|
||||
assert_equal "claude-haiku-4-5", params[:model]
|
||||
assert_equal "report_merchants", params[:tool_choice][:name]
|
||||
assert params[:tool_choice][:disable_parallel_tool_use]
|
||||
})
|
||||
|
||||
result = Provider::Anthropic::AutoMerchantDetector.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
transactions: @transactions,
|
||||
user_merchants: @user_merchants
|
||||
).auto_detect_merchants
|
||||
|
||||
txn1 = result.find { |r| r.transaction_id == "txn_1" }
|
||||
txn2 = result.find { |r| r.transaction_id == "txn_2" }
|
||||
|
||||
assert_equal "Amazon", txn1.business_name
|
||||
assert_equal "amazon.com", txn1.business_url
|
||||
assert_nil txn2.business_name
|
||||
assert_nil txn2.business_url
|
||||
end
|
||||
|
||||
test "normalizes case-insensitive matches against user_merchants" do
|
||||
fake_response = build_response(content: [
|
||||
tool_use_block(
|
||||
id: "toolu_1",
|
||||
name: "report_merchants",
|
||||
input: {
|
||||
"merchants" => [
|
||||
{ "transaction_id" => "txn_1", "business_name" => "shooters", "business_url" => nil }
|
||||
]
|
||||
}
|
||||
)
|
||||
])
|
||||
client = stub_client(fake_response)
|
||||
|
||||
result = Provider::Anthropic::AutoMerchantDetector.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
transactions: [ @transactions.first ],
|
||||
user_merchants: @user_merchants
|
||||
).auto_detect_merchants
|
||||
|
||||
assert_equal "Shooters", result.first.business_name
|
||||
end
|
||||
|
||||
test "raises when model returns no tool_use" do
|
||||
fake_response = build_response(content: [ OpenStruct.new(type: :text, text: "I cannot help") ])
|
||||
client = stub_client(fake_response)
|
||||
|
||||
err = assert_raises(Provider::Anthropic::Error) do
|
||||
Provider::Anthropic::AutoMerchantDetector.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
transactions: @transactions,
|
||||
user_merchants: @user_merchants
|
||||
).auto_detect_merchants
|
||||
end
|
||||
|
||||
assert_match(/did not invoke report_merchants/i, err.message)
|
||||
end
|
||||
|
||||
private
|
||||
def stub_client(response, expect_request: nil)
|
||||
messages = mock
|
||||
if expect_request
|
||||
messages.expects(:create).with do |params|
|
||||
expect_request.call(params)
|
||||
true
|
||||
end.returns(response)
|
||||
else
|
||||
messages.stubs(:create).returns(response)
|
||||
end
|
||||
client = mock
|
||||
client.stubs(:messages).returns(messages)
|
||||
client
|
||||
end
|
||||
|
||||
def build_response(content:, usage: { input_tokens: 100, output_tokens: 40 })
|
||||
OpenStruct.new(
|
||||
id: "msg_test",
|
||||
model: "claude-haiku-4-5",
|
||||
content: content,
|
||||
usage: OpenStruct.new(input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens])
|
||||
)
|
||||
end
|
||||
|
||||
def tool_use_block(id:, name:, input:)
|
||||
OpenStruct.new(type: :tool_use, id: id, name: name, input: input)
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,81 @@
|
||||
require "test_helper"
|
||||
|
||||
class Provider::Anthropic::ProviderMerchantEnhancerTest < ActiveSupport::TestCase
|
||||
setup do
|
||||
@merchants = [
|
||||
{ id: "m1", name: "Walmart" },
|
||||
{ id: "m2", name: "Local Diner" }
|
||||
]
|
||||
end
|
||||
|
||||
test "issues a forced tool call and maps enhancements" do
|
||||
fake_response = build_response(content: [
|
||||
tool_use_block(
|
||||
id: "toolu_1",
|
||||
name: "report_enhancements",
|
||||
input: {
|
||||
"merchants" => [
|
||||
{ "merchant_id" => "m1", "business_url" => "walmart.com" },
|
||||
{ "merchant_id" => "m2", "business_url" => nil }
|
||||
]
|
||||
}
|
||||
)
|
||||
])
|
||||
client = stub_client(fake_response, expect_request: ->(params) {
|
||||
assert_equal "report_enhancements", params[:tool_choice][:name]
|
||||
})
|
||||
|
||||
result = Provider::Anthropic::ProviderMerchantEnhancer.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
merchants: @merchants
|
||||
).enhance_merchants
|
||||
|
||||
assert_equal "walmart.com", result.find { |r| r.merchant_id == "m1" }.business_url
|
||||
assert_nil result.find { |r| r.merchant_id == "m2" }.business_url
|
||||
end
|
||||
|
||||
test "raises when model returns no tool_use" do
|
||||
fake_response = build_response(content: [ OpenStruct.new(type: :text, text: "Nope") ])
|
||||
client = stub_client(fake_response)
|
||||
|
||||
err = assert_raises(Provider::Anthropic::Error) do
|
||||
Provider::Anthropic::ProviderMerchantEnhancer.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
merchants: @merchants
|
||||
).enhance_merchants
|
||||
end
|
||||
|
||||
assert_match(/did not invoke report_enhancements/i, err.message)
|
||||
end
|
||||
|
||||
private
|
||||
def stub_client(response, expect_request: nil)
|
||||
messages = mock
|
||||
if expect_request
|
||||
messages.expects(:create).with do |params|
|
||||
expect_request.call(params)
|
||||
true
|
||||
end.returns(response)
|
||||
else
|
||||
messages.stubs(:create).returns(response)
|
||||
end
|
||||
client = mock
|
||||
client.stubs(:messages).returns(messages)
|
||||
client
|
||||
end
|
||||
|
||||
def build_response(content:, usage: { input_tokens: 60, output_tokens: 20 })
|
||||
OpenStruct.new(
|
||||
id: "msg_test",
|
||||
model: "claude-haiku-4-5",
|
||||
content: content,
|
||||
usage: OpenStruct.new(input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens])
|
||||
)
|
||||
end
|
||||
|
||||
def tool_use_block(id:, name:, input:)
|
||||
OpenStruct.new(type: :tool_use, id: id, name: name, input: input)
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user