Files
sure/app/models/llm_usage.rb
Juan José Mata f491916411 Track failed LLM API calls in llm_usages table (#360)
* Track failed LLM API calls in llm_usages table

This commit adds comprehensive error tracking for failed LLM API calls:

- Updated LlmUsage model with helper methods to identify failed calls
  and retrieve error details (failed?, http_status_code, error_message)

- Modified Provider::Openai to record failed API calls with error metadata
  including HTTP status codes and error messages in both native and
  generic chat response methods

- Enhanced UsageRecorder concern with record_usage_error method to support
  error tracking for auto-categorization and auto-merchant detection

- Updated LLM usage UI to display failed calls with:
  - Red background highlighting for failed rows
  - Error indicator icon with "Failed" label
  - Interactive tooltip on hover showing error message and HTTP status code

Failed calls are now tracked with zero tokens and null cost, storing
error details in the metadata JSONB column for visibility and debugging.

* Dark mode fixes

---------

Co-authored-by: Claude <noreply@anthropic.com>
2025-11-22 02:15:20 +01:00

173 lines
6.5 KiB
Ruby

class LlmUsage < ApplicationRecord
belongs_to :family
validates :provider, :model, :operation, presence: true
validates :prompt_tokens, :completion_tokens, :total_tokens, presence: true, numericality: { greater_than_or_equal_to: 0 }
validates :estimated_cost, numericality: { greater_than_or_equal_to: 0 }, allow_nil: true
scope :for_family, ->(family) { where(family: family) }
scope :for_operation, ->(operation) { where(operation: operation) }
scope :recent, -> { order(created_at: :desc) }
scope :for_date_range, ->(start_date, end_date) { where(created_at: start_date..end_date) }
# OpenAI pricing per 1M tokens (as of Oct 2025)
# Source: https://platform.openai.com/docs/pricing
PRICING = {
"openai" => {
# GPT-4.1 and similar models
"gpt-4.1" => { prompt: 2.00, completion: 8.00 },
"gpt-4.1-mini" => { prompt: 0.40, completion: 1.60 },
"gpt-4.1-nano" => { prompt: 0.40, completion: 1.60 },
# 4o
"gpt-4o" => { prompt: 2.50, completion: 10.00 },
"gpt-4o-mini" => { prompt: 0.15, completion: 0.60 },
# GPT-5 models (estimated pricing)
"gpt-5" => { prompt: 1.25, completion: 10.00 },
"gpt-5-mini" => { prompt: 0.25, completion: 2.00 },
"gpt-5-nano" => { prompt: 0.05, completion: 0.40 },
"gpt-5-pro" => { prompt: 15.00, completion: 120.00 },
# o1 models
"o1-mini" => { prompt: 1.10, completion: 4.40 },
"o1" => { prompt: 15.00, completion: 60.00 },
# o3 models (estimated pricing)
"o3" => { prompt: 2.00, completion: 8.00 },
"o3-mini" => { prompt: 1.10, completion: 4.40 },
"o3-pro" => { prompt: 20.00, completion: 80.00 }
},
"google" => {
"gemini-2.5-pro" => { prompt: 1.25, completion: 10.00 },
"gemini-2.5-flash" => { prompt: 0.3, completion: 2.50 }
}
}.freeze
# Calculate cost for a model and token usage
# Provider is automatically inferred from the model using the pricing map
# Returns nil if pricing is not available for the model (e.g., custom/self-hosted providers)
def self.calculate_cost(model:, prompt_tokens:, completion_tokens:)
provider = infer_provider(model)
pricing = find_pricing(provider, model)
unless pricing
Rails.logger.info("No pricing found for model: #{model} (inferred provider: #{provider})")
return nil
end
# Pricing is per 1M tokens, so divide by 1_000_000
prompt_cost = (prompt_tokens * pricing[:prompt]) / 1_000_000.0
completion_cost = (completion_tokens * pricing[:completion]) / 1_000_000.0
cost = (prompt_cost + completion_cost).round(6)
Rails.logger.info("Calculated cost for #{provider}/#{model}: $#{cost} (#{prompt_tokens} prompt tokens, #{completion_tokens} completion tokens)")
cost
end
# Find pricing for a model, with prefix matching support
def self.find_pricing(provider, model)
return nil unless PRICING.key?(provider)
provider_pricing = PRICING[provider]
# Try exact match first
return provider_pricing[model] if provider_pricing.key?(model)
# Try prefix matching (e.g., "gpt-4.1-2024-08-06" matches "gpt-4.1")
provider_pricing.each do |model_prefix, pricing|
return pricing if model.start_with?(model_prefix)
end
nil
end
# Infer provider from model name by checking which provider has pricing for it
# Returns the provider name if found, or "openai" as default (for backward compatibility)
def self.infer_provider(model)
return "openai" if model.blank?
# Check each provider to see if they have pricing for this model
PRICING.each do |provider_name, provider_pricing|
# Try exact match first
return provider_name if provider_pricing.key?(model)
# Try prefix matching
provider_pricing.each_key do |model_prefix|
return provider_name if model.start_with?(model_prefix)
end
end
# Default to "openai" if no pricing found (for custom/self-hosted models)
"openai"
end
# Aggregate statistics for a family
def self.statistics_for_family(family, start_date: nil, end_date: nil)
scope = for_family(family)
scope = scope.for_date_range(start_date, end_date) if start_date && end_date
# Exclude records with nil cost from cost calculations
scope_with_cost = scope.where.not(estimated_cost: nil)
requests_with_cost = scope_with_cost.count
total_cost = scope_with_cost.sum(:estimated_cost).to_f.round(2)
avg_cost = requests_with_cost > 0 ? (total_cost / requests_with_cost).round(4) : 0.0
{
total_requests: scope.count,
requests_with_cost: requests_with_cost,
total_prompt_tokens: scope.sum(:prompt_tokens),
total_completion_tokens: scope.sum(:completion_tokens),
total_tokens: scope.sum(:total_tokens),
total_cost: total_cost,
avg_cost: avg_cost,
by_operation: scope_with_cost.group(:operation).sum(:estimated_cost).transform_values { |v| v.to_f.round(2) },
by_model: scope_with_cost.group(:model).sum(:estimated_cost).transform_values { |v| v.to_f.round(2) }
}
end
# Format cost as currency
def formatted_cost
estimated_cost.nil? ? "N/A" : "$#{estimated_cost.round(4)}"
end
# Check if this usage record represents a failed API call
def failed?
metadata.present? && metadata["error"].present?
end
# Get the HTTP status code from metadata
def http_status_code
metadata&.dig("http_status_code")
end
# Get the error message from metadata
def error_message
metadata&.dig("error")
end
# Estimate cost for auto-categorizing a batch of transactions
# Based on typical token usage patterns:
# - ~100 tokens per transaction in the prompt
# - ~50 tokens per category
# - ~50 tokens for completion per transaction
# Returns nil if pricing is not available for the model
def self.estimate_auto_categorize_cost(transaction_count:, category_count:, model: "gpt-4.1")
return 0.0 if transaction_count.zero?
# Estimate tokens
base_prompt_tokens = 150 # System message and instructions
transaction_tokens = transaction_count * 100
category_tokens = category_count * 50
estimated_prompt_tokens = base_prompt_tokens + transaction_tokens + category_tokens
# Completion tokens: roughly one category name per transaction
estimated_completion_tokens = transaction_count * 50
# calculate_cost will automatically infer the provider from the model
# Returns nil if pricing is not available
calculate_cost(
model: model,
prompt_tokens: estimated_prompt_tokens,
completion_tokens: estimated_completion_tokens
)
end
end