mirror of
https://github.com/we-promise/sure.git
synced 2026-04-19 20:14:08 +00:00
LLM cost estimation (#223)
* Password reset back button also after confirmation Signed-off-by: Juan José Mata <juanjo.mata@gmail.com> * Implement a filter for category (#215) - Also implement an is empty/is null condition. * Implement an LLM cost estimation page Track costs across all the cost categories: auto categorization, auto merchant detection and chat. Show warning with estimated cost when running a rule that contains AI. * Update pricing * Add google pricing and fix inferred model everywhere. * Update app/models/llm_usage.rb Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: soky srm <sokysrm@gmail.com> * FIX address review * Linter * Address review - Lowered log level - extracted the duplicated record_usage method into a shared concern * Update app/controllers/settings/llm_usages_controller.rb Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: soky srm <sokysrm@gmail.com> * Moved attr_reader out of private --------- Signed-off-by: Juan José Mata <juanjo.mata@gmail.com> Signed-off-by: soky srm <sokysrm@gmail.com> Co-authored-by: Juan José Mata <juanjo.mata@gmail.com> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
@@ -8,6 +8,13 @@ class Provider::Openai < Provider
|
||||
DEFAULT_OPENAI_MODEL_PREFIXES = %w[gpt-4 gpt-5 o1 o3]
|
||||
DEFAULT_MODEL = "gpt-4.1"
|
||||
|
||||
# Returns the effective model that would be used by the provider
|
||||
# Uses the same logic as Provider::Registry and the initializer
|
||||
def self.effective_model
|
||||
configured_model = ENV.fetch("OPENAI_MODEL", Setting.openai_model)
|
||||
configured_model.presence || DEFAULT_MODEL
|
||||
end
|
||||
|
||||
def initialize(access_token, uri_base: nil, model: nil)
|
||||
client_options = { access_token: access_token }
|
||||
client_options[:uri_base] = uri_base if uri_base.present?
|
||||
@@ -32,7 +39,7 @@ class Provider::Openai < Provider
|
||||
@uri_base.present?
|
||||
end
|
||||
|
||||
def auto_categorize(transactions: [], user_categories: [], model: "")
|
||||
def auto_categorize(transactions: [], user_categories: [], model: "", family: nil)
|
||||
with_provider_response do
|
||||
raise Error, "Too many transactions to auto-categorize. Max is 25 per request." if transactions.size > 25
|
||||
|
||||
@@ -49,7 +56,8 @@ class Provider::Openai < Provider
|
||||
transactions: transactions,
|
||||
user_categories: user_categories,
|
||||
custom_provider: custom_provider?,
|
||||
langfuse_trace: trace
|
||||
langfuse_trace: trace,
|
||||
family: family
|
||||
).auto_categorize
|
||||
|
||||
trace&.update(output: result.map(&:to_h))
|
||||
@@ -58,7 +66,7 @@ class Provider::Openai < Provider
|
||||
end
|
||||
end
|
||||
|
||||
def auto_detect_merchants(transactions: [], user_merchants: [], model: "")
|
||||
def auto_detect_merchants(transactions: [], user_merchants: [], model: "", family: nil)
|
||||
with_provider_response do
|
||||
raise Error, "Too many transactions to auto-detect merchants. Max is 25 per request." if transactions.size > 25
|
||||
|
||||
@@ -75,7 +83,8 @@ class Provider::Openai < Provider
|
||||
transactions: transactions,
|
||||
user_merchants: user_merchants,
|
||||
custom_provider: custom_provider?,
|
||||
langfuse_trace: trace
|
||||
langfuse_trace: trace,
|
||||
family: family
|
||||
).auto_detect_merchants
|
||||
|
||||
trace&.update(output: result.map(&:to_h))
|
||||
@@ -93,7 +102,8 @@ class Provider::Openai < Provider
|
||||
streamer: nil,
|
||||
previous_response_id: nil,
|
||||
session_id: nil,
|
||||
user_identifier: nil
|
||||
user_identifier: nil,
|
||||
family: nil
|
||||
)
|
||||
if custom_provider?
|
||||
generic_chat_response(
|
||||
@@ -104,7 +114,8 @@ class Provider::Openai < Provider
|
||||
function_results: function_results,
|
||||
streamer: streamer,
|
||||
session_id: session_id,
|
||||
user_identifier: user_identifier
|
||||
user_identifier: user_identifier,
|
||||
family: family
|
||||
)
|
||||
else
|
||||
native_chat_response(
|
||||
@@ -116,7 +127,8 @@ class Provider::Openai < Provider
|
||||
streamer: streamer,
|
||||
previous_response_id: previous_response_id,
|
||||
session_id: session_id,
|
||||
user_identifier: user_identifier
|
||||
user_identifier: user_identifier,
|
||||
family: family
|
||||
)
|
||||
end
|
||||
end
|
||||
@@ -133,7 +145,8 @@ class Provider::Openai < Provider
|
||||
streamer: nil,
|
||||
previous_response_id: nil,
|
||||
session_id: nil,
|
||||
user_identifier: nil
|
||||
user_identifier: nil,
|
||||
family: nil
|
||||
)
|
||||
with_provider_response do
|
||||
chat_config = ChatConfig.new(
|
||||
@@ -175,6 +188,7 @@ class Provider::Openai < Provider
|
||||
response_chunk = collected_chunks.find { |chunk| chunk.type == "response" }
|
||||
response = response_chunk.data
|
||||
usage = response_chunk.usage
|
||||
Rails.logger.debug("Stream response usage: #{usage.inspect}")
|
||||
log_langfuse_generation(
|
||||
name: "chat_response",
|
||||
model: model,
|
||||
@@ -184,9 +198,11 @@ class Provider::Openai < Provider
|
||||
session_id: session_id,
|
||||
user_identifier: user_identifier
|
||||
)
|
||||
record_llm_usage(family: family, model: model, operation: "chat", usage: usage)
|
||||
response
|
||||
else
|
||||
parsed = ChatParser.new(raw_response).parsed
|
||||
Rails.logger.debug("Non-stream raw_response['usage']: #{raw_response['usage'].inspect}")
|
||||
log_langfuse_generation(
|
||||
name: "chat_response",
|
||||
model: model,
|
||||
@@ -196,6 +212,7 @@ class Provider::Openai < Provider
|
||||
session_id: session_id,
|
||||
user_identifier: user_identifier
|
||||
)
|
||||
record_llm_usage(family: family, model: model, operation: "chat", usage: raw_response["usage"])
|
||||
parsed
|
||||
end
|
||||
rescue => e
|
||||
@@ -220,7 +237,8 @@ class Provider::Openai < Provider
|
||||
function_results: [],
|
||||
streamer: nil,
|
||||
session_id: nil,
|
||||
user_identifier: nil
|
||||
user_identifier: nil,
|
||||
family: nil
|
||||
)
|
||||
with_provider_response do
|
||||
messages = build_generic_messages(
|
||||
@@ -253,6 +271,8 @@ class Provider::Openai < Provider
|
||||
user_identifier: user_identifier
|
||||
)
|
||||
|
||||
record_llm_usage(family: family, model: model, operation: "chat", usage: raw_response["usage"])
|
||||
|
||||
# If a streamer was provided, manually call it with the parsed response
|
||||
# to maintain the same contract as the streaming version
|
||||
if streamer.present?
|
||||
@@ -408,4 +428,46 @@ class Provider::Openai < Provider
|
||||
rescue => e
|
||||
Rails.logger.warn("Langfuse logging failed: #{e.message}")
|
||||
end
|
||||
|
||||
def record_llm_usage(family:, model:, operation:, usage:)
|
||||
return unless family && usage
|
||||
|
||||
Rails.logger.info("Recording LLM usage - Raw usage data: #{usage.inspect}")
|
||||
|
||||
# Handle both old and new OpenAI API response formats
|
||||
# Old format: prompt_tokens, completion_tokens, total_tokens
|
||||
# New format: input_tokens, output_tokens, total_tokens
|
||||
prompt_tokens = usage["prompt_tokens"] || usage["input_tokens"] || 0
|
||||
completion_tokens = usage["completion_tokens"] || usage["output_tokens"] || 0
|
||||
total_tokens = usage["total_tokens"] || 0
|
||||
|
||||
Rails.logger.info("Extracted tokens - prompt: #{prompt_tokens}, completion: #{completion_tokens}, total: #{total_tokens}")
|
||||
|
||||
estimated_cost = LlmUsage.calculate_cost(
|
||||
model: model,
|
||||
prompt_tokens: prompt_tokens,
|
||||
completion_tokens: completion_tokens
|
||||
)
|
||||
|
||||
# Log when we can't estimate the cost (e.g., custom/self-hosted models)
|
||||
if estimated_cost.nil?
|
||||
Rails.logger.info("Recording LLM usage without cost estimate for unknown model: #{model} (custom provider: #{custom_provider?})")
|
||||
end
|
||||
|
||||
inferred_provider = LlmUsage.infer_provider(model)
|
||||
family.llm_usages.create!(
|
||||
provider: inferred_provider,
|
||||
model: model,
|
||||
operation: operation,
|
||||
prompt_tokens: prompt_tokens,
|
||||
completion_tokens: completion_tokens,
|
||||
total_tokens: total_tokens,
|
||||
estimated_cost: estimated_cost,
|
||||
metadata: {}
|
||||
)
|
||||
|
||||
Rails.logger.info("LLM usage recorded successfully - Cost: #{estimated_cost.inspect}")
|
||||
rescue => e
|
||||
Rails.logger.error("Failed to record LLM usage: #{e.message}")
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user