mirror of
https://github.com/we-promise/sure.git
synced 2026-05-30 07:49:01 +00:00
Implements auto_categorize, auto_detect_merchants, and
enhance_provider_merchants on Provider::Anthropic via forced tool calls,
plus the cost-ledger plumbing they need.
- Provider::Anthropic::AutoCategorizer, AutoMerchantDetector,
ProviderMerchantEnhancer each define a single output tool whose
input_schema mirrors the desired output, then force the model to call
it via tool_choice: { type: "tool", name: ..., disable_parallel_tool_use: true }.
Anthropic guarantees the tool_use.input matches the schema, so there
is no JSON parsing fragility, no <think> tag stripping, and no
json_object/json_schema fallback ladders.
- Concerns::UsageRecorder mirrors the OpenAI sibling but persists
cache_creation_input_tokens / cache_read_input_tokens to dedicated
columns instead of metadata.
- Migration adds cache_creation_tokens, cache_read_tokens (nullable
integers) to llm_usages. OpenAI rows leave them null.
- LlmUsage::PRICING gains Claude 4.x rows (opus-4-7 $15/$75, sonnet-4-6
$3/$15, haiku-4-5 $1/$5 per MTok). infer_provider returns "anthropic"
for claude-* via the existing exact/prefix lookup.
- Provider::Anthropic#chat_response now persists cache columns directly
rather than stashing them in metadata.
- 25-transaction batch cap mirrors the OpenAI provider so the cost
ledger sees the same shape regardless of which provider ran a batch.
Tests cover the forced-tool-call path, null/None normalization,
case-insensitive merchant matching, the missing-tool_use error path,
and Anthropic-specific pricing + provider inference on LlmUsage.
Stacked on #1983 (PR 1/5). 3/5 PDF + vision next.
182 lines
6.9 KiB
Ruby
182 lines
6.9 KiB
Ruby
class LlmUsage < ApplicationRecord
|
|
belongs_to :family
|
|
|
|
validates :provider, :model, :operation, presence: true
|
|
validates :prompt_tokens, :completion_tokens, :total_tokens, presence: true, numericality: { greater_than_or_equal_to: 0 }
|
|
validates :estimated_cost, numericality: { greater_than_or_equal_to: 0 }, allow_nil: true
|
|
|
|
scope :for_family, ->(family) { where(family: family) }
|
|
scope :for_operation, ->(operation) { where(operation: operation) }
|
|
scope :recent, -> { order(created_at: :desc) }
|
|
scope :for_date_range, ->(start_date, end_date) { where(created_at: start_date..end_date) }
|
|
|
|
# OpenAI pricing per 1M tokens (as of Oct 2025)
|
|
# Source: https://platform.openai.com/docs/pricing
|
|
PRICING = {
|
|
"openai" => {
|
|
# GPT-4.1 and similar models
|
|
"gpt-4.1" => { prompt: 2.00, completion: 8.00 },
|
|
"gpt-4.1-mini" => { prompt: 0.40, completion: 1.60 },
|
|
"gpt-4.1-nano" => { prompt: 0.40, completion: 1.60 },
|
|
# 4o
|
|
"gpt-4o" => { prompt: 2.50, completion: 10.00 },
|
|
"gpt-4o-mini" => { prompt: 0.15, completion: 0.60 },
|
|
# GPT-5 models (estimated pricing)
|
|
"gpt-5" => { prompt: 1.25, completion: 10.00 },
|
|
"gpt-5-mini" => { prompt: 0.25, completion: 2.00 },
|
|
"gpt-5-nano" => { prompt: 0.05, completion: 0.40 },
|
|
"gpt-5-pro" => { prompt: 15.00, completion: 120.00 },
|
|
# o1 models
|
|
"o1-mini" => { prompt: 1.10, completion: 4.40 },
|
|
"o1" => { prompt: 15.00, completion: 60.00 },
|
|
# o3 models (estimated pricing)
|
|
"o3" => { prompt: 2.00, completion: 8.00 },
|
|
"o3-mini" => { prompt: 1.10, completion: 4.40 },
|
|
"o3-pro" => { prompt: 20.00, completion: 80.00 }
|
|
},
|
|
"google" => {
|
|
"gemini-2.5-pro" => { prompt: 1.25, completion: 10.00 },
|
|
"gemini-2.5-flash" => { prompt: 0.3, completion: 2.50 }
|
|
},
|
|
# Anthropic pricing per 1M tokens (Claude 4.x family, as of May 2026)
|
|
# Source: https://www.anthropic.com/pricing
|
|
"anthropic" => {
|
|
"claude-opus-4-7" => { prompt: 15.00, completion: 75.00 },
|
|
"claude-opus-4-6" => { prompt: 15.00, completion: 75.00 },
|
|
"claude-sonnet-4-6" => { prompt: 3.00, completion: 15.00 },
|
|
"claude-sonnet-4-5" => { prompt: 3.00, completion: 15.00 },
|
|
"claude-haiku-4-5" => { prompt: 1.00, completion: 5.00 }
|
|
}
|
|
}.freeze
|
|
|
|
# Calculate cost for a model and token usage
|
|
# Provider is automatically inferred from the model using the pricing map
|
|
# Returns nil if pricing is not available for the model (e.g., custom/self-hosted providers)
|
|
def self.calculate_cost(model:, prompt_tokens:, completion_tokens:)
|
|
provider = infer_provider(model)
|
|
pricing = find_pricing(provider, model)
|
|
|
|
unless pricing
|
|
Rails.logger.info("No pricing found for model: #{model} (inferred provider: #{provider})")
|
|
return nil
|
|
end
|
|
|
|
# Pricing is per 1M tokens, so divide by 1_000_000
|
|
prompt_cost = (prompt_tokens * pricing[:prompt]) / 1_000_000.0
|
|
completion_cost = (completion_tokens * pricing[:completion]) / 1_000_000.0
|
|
|
|
cost = (prompt_cost + completion_cost).round(6)
|
|
Rails.logger.info("Calculated cost for #{provider}/#{model}: $#{cost} (#{prompt_tokens} prompt tokens, #{completion_tokens} completion tokens)")
|
|
cost
|
|
end
|
|
|
|
# Find pricing for a model, with prefix matching support
|
|
def self.find_pricing(provider, model)
|
|
return nil unless PRICING.key?(provider)
|
|
|
|
provider_pricing = PRICING[provider]
|
|
|
|
# Try exact match first
|
|
return provider_pricing[model] if provider_pricing.key?(model)
|
|
|
|
# Try prefix matching (e.g., "gpt-4.1-2024-08-06" matches "gpt-4.1")
|
|
provider_pricing.each do |model_prefix, pricing|
|
|
return pricing if model.start_with?(model_prefix)
|
|
end
|
|
|
|
nil
|
|
end
|
|
|
|
# Infer provider from model name by checking which provider has pricing for it
|
|
# Returns the provider name if found, or "openai" as default (for backward compatibility)
|
|
def self.infer_provider(model)
|
|
return "openai" if model.blank?
|
|
|
|
# Check each provider to see if they have pricing for this model
|
|
PRICING.each do |provider_name, provider_pricing|
|
|
# Try exact match first
|
|
return provider_name if provider_pricing.key?(model)
|
|
|
|
# Try prefix matching
|
|
provider_pricing.each_key do |model_prefix|
|
|
return provider_name if model.start_with?(model_prefix)
|
|
end
|
|
end
|
|
|
|
# Default to "openai" if no pricing found (for custom/self-hosted models)
|
|
"openai"
|
|
end
|
|
|
|
# Aggregate statistics for a family
|
|
def self.statistics_for_family(family, start_date: nil, end_date: nil)
|
|
scope = for_family(family)
|
|
scope = scope.for_date_range(start_date, end_date) if start_date && end_date
|
|
|
|
# Exclude records with nil cost from cost calculations
|
|
scope_with_cost = scope.where.not(estimated_cost: nil)
|
|
|
|
requests_with_cost = scope_with_cost.count
|
|
total_cost = scope_with_cost.sum(:estimated_cost).to_f.round(2)
|
|
avg_cost = requests_with_cost > 0 ? (total_cost / requests_with_cost).round(4) : 0.0
|
|
|
|
{
|
|
total_requests: scope.count,
|
|
requests_with_cost: requests_with_cost,
|
|
total_prompt_tokens: scope.sum(:prompt_tokens),
|
|
total_completion_tokens: scope.sum(:completion_tokens),
|
|
total_tokens: scope.sum(:total_tokens),
|
|
total_cost: total_cost,
|
|
avg_cost: avg_cost,
|
|
by_operation: scope_with_cost.group(:operation).sum(:estimated_cost).transform_values { |v| v.to_f.round(2) },
|
|
by_model: scope_with_cost.group(:model).sum(:estimated_cost).transform_values { |v| v.to_f.round(2) }
|
|
}
|
|
end
|
|
|
|
# Format cost as currency
|
|
def formatted_cost
|
|
estimated_cost.nil? ? "N/A" : "$#{estimated_cost.round(4)}"
|
|
end
|
|
|
|
# Check if this usage record represents a failed API call
|
|
def failed?
|
|
metadata.present? && metadata["error"].present?
|
|
end
|
|
|
|
# Get the HTTP status code from metadata
|
|
def http_status_code
|
|
metadata&.dig("http_status_code")
|
|
end
|
|
|
|
# Get the error message from metadata
|
|
def error_message
|
|
metadata&.dig("error")
|
|
end
|
|
|
|
# Estimate cost for auto-categorizing a batch of transactions
|
|
# Based on typical token usage patterns:
|
|
# - ~100 tokens per transaction in the prompt
|
|
# - ~50 tokens per category
|
|
# - ~50 tokens for completion per transaction
|
|
# Returns nil if pricing is not available for the model
|
|
def self.estimate_auto_categorize_cost(transaction_count:, category_count:, model: "gpt-4.1")
|
|
return 0.0 if transaction_count.zero?
|
|
|
|
# Estimate tokens
|
|
base_prompt_tokens = 150 # System message and instructions
|
|
transaction_tokens = transaction_count * 100
|
|
category_tokens = category_count * 50
|
|
estimated_prompt_tokens = base_prompt_tokens + transaction_tokens + category_tokens
|
|
|
|
# Completion tokens: roughly one category name per transaction
|
|
estimated_completion_tokens = transaction_count * 50
|
|
|
|
# calculate_cost will automatically infer the provider from the model
|
|
# Returns nil if pricing is not available
|
|
calculate_cost(
|
|
model: model,
|
|
prompt_tokens: estimated_prompt_tokens,
|
|
completion_tokens: estimated_completion_tokens
|
|
)
|
|
end
|
|
end
|