LLM cost estimation (#223)

* Password reset back button also after confirmation

Signed-off-by: Juan José Mata <juanjo.mata@gmail.com>

* Implement a filter for category (#215)

- Also implement an is empty/is null condition.

* Implement an LLM cost estimation page

Track costs across all the cost categories: auto categorization, auto merchant detection and chat.
Show warning with estimated cost when running a rule that contains AI.

* Update pricing

* Add google pricing

and fix inferred model everywhere.

* Update app/models/llm_usage.rb

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
Signed-off-by: soky srm <sokysrm@gmail.com>

* FIX address review

* Linter

* Address review

- Lowered log level
- extracted the duplicated record_usage method into a shared concern

* Update app/controllers/settings/llm_usages_controller.rb

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
Signed-off-by: soky srm <sokysrm@gmail.com>

* Moved attr_reader out of private

---------

Signed-off-by: Juan José Mata <juanjo.mata@gmail.com>
Signed-off-by: soky srm <sokysrm@gmail.com>
Co-authored-by: Juan José Mata <juanjo.mata@gmail.com>
Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
This commit is contained in:
soky srm
2025-10-24 00:08:59 +02:00
committed by GitHub
parent 4999409082
commit bb364fab38
19 changed files with 651 additions and 21 deletions

View File

@@ -8,6 +8,13 @@ class Provider::Openai < Provider
DEFAULT_OPENAI_MODEL_PREFIXES = %w[gpt-4 gpt-5 o1 o3]
DEFAULT_MODEL = "gpt-4.1"
# Returns the effective model that would be used by the provider
# Uses the same logic as Provider::Registry and the initializer
def self.effective_model
configured_model = ENV.fetch("OPENAI_MODEL", Setting.openai_model)
configured_model.presence || DEFAULT_MODEL
end
def initialize(access_token, uri_base: nil, model: nil)
client_options = { access_token: access_token }
client_options[:uri_base] = uri_base if uri_base.present?
@@ -32,7 +39,7 @@ class Provider::Openai < Provider
@uri_base.present?
end
def auto_categorize(transactions: [], user_categories: [], model: "")
def auto_categorize(transactions: [], user_categories: [], model: "", family: nil)
with_provider_response do
raise Error, "Too many transactions to auto-categorize. Max is 25 per request." if transactions.size > 25
@@ -49,7 +56,8 @@ class Provider::Openai < Provider
transactions: transactions,
user_categories: user_categories,
custom_provider: custom_provider?,
langfuse_trace: trace
langfuse_trace: trace,
family: family
).auto_categorize
trace&.update(output: result.map(&:to_h))
@@ -58,7 +66,7 @@ class Provider::Openai < Provider
end
end
def auto_detect_merchants(transactions: [], user_merchants: [], model: "")
def auto_detect_merchants(transactions: [], user_merchants: [], model: "", family: nil)
with_provider_response do
raise Error, "Too many transactions to auto-detect merchants. Max is 25 per request." if transactions.size > 25
@@ -75,7 +83,8 @@ class Provider::Openai < Provider
transactions: transactions,
user_merchants: user_merchants,
custom_provider: custom_provider?,
langfuse_trace: trace
langfuse_trace: trace,
family: family
).auto_detect_merchants
trace&.update(output: result.map(&:to_h))
@@ -93,7 +102,8 @@ class Provider::Openai < Provider
streamer: nil,
previous_response_id: nil,
session_id: nil,
user_identifier: nil
user_identifier: nil,
family: nil
)
if custom_provider?
generic_chat_response(
@@ -104,7 +114,8 @@ class Provider::Openai < Provider
function_results: function_results,
streamer: streamer,
session_id: session_id,
user_identifier: user_identifier
user_identifier: user_identifier,
family: family
)
else
native_chat_response(
@@ -116,7 +127,8 @@ class Provider::Openai < Provider
streamer: streamer,
previous_response_id: previous_response_id,
session_id: session_id,
user_identifier: user_identifier
user_identifier: user_identifier,
family: family
)
end
end
@@ -133,7 +145,8 @@ class Provider::Openai < Provider
streamer: nil,
previous_response_id: nil,
session_id: nil,
user_identifier: nil
user_identifier: nil,
family: nil
)
with_provider_response do
chat_config = ChatConfig.new(
@@ -175,6 +188,7 @@ class Provider::Openai < Provider
response_chunk = collected_chunks.find { |chunk| chunk.type == "response" }
response = response_chunk.data
usage = response_chunk.usage
Rails.logger.debug("Stream response usage: #{usage.inspect}")
log_langfuse_generation(
name: "chat_response",
model: model,
@@ -184,9 +198,11 @@ class Provider::Openai < Provider
session_id: session_id,
user_identifier: user_identifier
)
record_llm_usage(family: family, model: model, operation: "chat", usage: usage)
response
else
parsed = ChatParser.new(raw_response).parsed
Rails.logger.debug("Non-stream raw_response['usage']: #{raw_response['usage'].inspect}")
log_langfuse_generation(
name: "chat_response",
model: model,
@@ -196,6 +212,7 @@ class Provider::Openai < Provider
session_id: session_id,
user_identifier: user_identifier
)
record_llm_usage(family: family, model: model, operation: "chat", usage: raw_response["usage"])
parsed
end
rescue => e
@@ -220,7 +237,8 @@ class Provider::Openai < Provider
function_results: [],
streamer: nil,
session_id: nil,
user_identifier: nil
user_identifier: nil,
family: nil
)
with_provider_response do
messages = build_generic_messages(
@@ -253,6 +271,8 @@ class Provider::Openai < Provider
user_identifier: user_identifier
)
record_llm_usage(family: family, model: model, operation: "chat", usage: raw_response["usage"])
# If a streamer was provided, manually call it with the parsed response
# to maintain the same contract as the streaming version
if streamer.present?
@@ -408,4 +428,46 @@ class Provider::Openai < Provider
rescue => e
Rails.logger.warn("Langfuse logging failed: #{e.message}")
end
def record_llm_usage(family:, model:, operation:, usage:)
return unless family && usage
Rails.logger.info("Recording LLM usage - Raw usage data: #{usage.inspect}")
# Handle both old and new OpenAI API response formats
# Old format: prompt_tokens, completion_tokens, total_tokens
# New format: input_tokens, output_tokens, total_tokens
prompt_tokens = usage["prompt_tokens"] || usage["input_tokens"] || 0
completion_tokens = usage["completion_tokens"] || usage["output_tokens"] || 0
total_tokens = usage["total_tokens"] || 0
Rails.logger.info("Extracted tokens - prompt: #{prompt_tokens}, completion: #{completion_tokens}, total: #{total_tokens}")
estimated_cost = LlmUsage.calculate_cost(
model: model,
prompt_tokens: prompt_tokens,
completion_tokens: completion_tokens
)
# Log when we can't estimate the cost (e.g., custom/self-hosted models)
if estimated_cost.nil?
Rails.logger.info("Recording LLM usage without cost estimate for unknown model: #{model} (custom provider: #{custom_provider?})")
end
inferred_provider = LlmUsage.infer_provider(model)
family.llm_usages.create!(
provider: inferred_provider,
model: model,
operation: operation,
prompt_tokens: prompt_tokens,
completion_tokens: completion_tokens,
total_tokens: total_tokens,
estimated_cost: estimated_cost,
metadata: {}
)
Rails.logger.info("LLM usage recorded successfully - Cost: #{estimated_cost.inspect}")
rescue => e
Rails.logger.error("Failed to record LLM usage: #{e.message}")
end
end