LLM cost estimation (#223)

* Password reset back button also after confirmation Signed-off-by: Juan José Mata <juanjo.mata@gmail.com> * Implement a filter for category (#215) - Also implement an is empty/is null condition. * Implement an LLM cost estimation page Track costs across all the cost categories: auto categorization, auto merchant detection and chat. Show warning with estimated cost when running a rule that contains AI. * Update pricing * Add google pricing and fix inferred model everywhere. * Update app/models/llm_usage.rb Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: soky srm <sokysrm@gmail.com> * FIX address review * Linter * Address review - Lowered log level - extracted the duplicated record_usage method into a shared concern * Update app/controllers/settings/llm_usages_controller.rb Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: soky srm <sokysrm@gmail.com> * Moved attr_reader out of private --------- Signed-off-by: Juan José Mata <juanjo.mata@gmail.com> Signed-off-by: soky srm <sokysrm@gmail.com> Co-authored-by: Juan José Mata <juanjo.mata@gmail.com> Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
2026-04-19 20:14:08 +00:00 · 2025-10-24 00:08:59 +02:00
parent 4999409082
commit bb364fab38
19 changed files with 651 additions and 21 deletions
--- a/app/models/provider/openai.rb
+++ b/app/models/provider/openai.rb
@@ -8,6 +8,13 @@ class Provider::Openai < Provider
  DEFAULT_OPENAI_MODEL_PREFIXES = %w[gpt-4 gpt-5 o1 o3]
  DEFAULT_MODEL = "gpt-4.1"

+  # Returns the effective model that would be used by the provider
+  # Uses the same logic as Provider::Registry and the initializer
+  def self.effective_model
+    configured_model = ENV.fetch("OPENAI_MODEL", Setting.openai_model)
+    configured_model.presence || DEFAULT_MODEL
+  end
+
  def initialize(access_token, uri_base: nil, model: nil)
    client_options = { access_token: access_token }
    client_options[:uri_base] = uri_base if uri_base.present?
@@ -32,7 +39,7 @@ class Provider::Openai < Provider
    @uri_base.present?
  end

-  def auto_categorize(transactions: [], user_categories: [], model: "")
+  def auto_categorize(transactions: [], user_categories: [], model: "", family: nil)
    with_provider_response do
      raise Error, "Too many transactions to auto-categorize. Max is 25 per request." if transactions.size > 25

@@ -49,7 +56,8 @@ class Provider::Openai < Provider
        transactions: transactions,
        user_categories: user_categories,
        custom_provider: custom_provider?,
-        langfuse_trace: trace
+        langfuse_trace: trace,
+        family: family
      ).auto_categorize

      trace&.update(output: result.map(&:to_h))
@@ -58,7 +66,7 @@ class Provider::Openai < Provider
    end
  end

-  def auto_detect_merchants(transactions: [], user_merchants: [], model: "")
+  def auto_detect_merchants(transactions: [], user_merchants: [], model: "", family: nil)
    with_provider_response do
      raise Error, "Too many transactions to auto-detect merchants. Max is 25 per request." if transactions.size > 25

@@ -75,7 +83,8 @@ class Provider::Openai < Provider
        transactions: transactions,
        user_merchants: user_merchants,
        custom_provider: custom_provider?,
-        langfuse_trace: trace
+        langfuse_trace: trace,
+        family: family
      ).auto_detect_merchants

      trace&.update(output: result.map(&:to_h))
@@ -93,7 +102,8 @@ class Provider::Openai < Provider
    streamer: nil,
    previous_response_id: nil,
    session_id: nil,
-    user_identifier: nil
+    user_identifier: nil,
+    family: nil
  )
    if custom_provider?
      generic_chat_response(
@@ -104,7 +114,8 @@ class Provider::Openai < Provider
        function_results: function_results,
        streamer: streamer,
        session_id: session_id,
-        user_identifier: user_identifier
+        user_identifier: user_identifier,
+        family: family
      )
    else
      native_chat_response(
@@ -116,7 +127,8 @@ class Provider::Openai < Provider
        streamer: streamer,
        previous_response_id: previous_response_id,
        session_id: session_id,
-        user_identifier: user_identifier
+        user_identifier: user_identifier,
+        family: family
      )
    end
  end
@@ -133,7 +145,8 @@ class Provider::Openai < Provider
      streamer: nil,
      previous_response_id: nil,
      session_id: nil,
-      user_identifier: nil
+      user_identifier: nil,
+      family: nil
    )
      with_provider_response do
        chat_config = ChatConfig.new(
@@ -175,6 +188,7 @@ class Provider::Openai < Provider
            response_chunk = collected_chunks.find { |chunk| chunk.type == "response" }
            response = response_chunk.data
            usage = response_chunk.usage
+            Rails.logger.debug("Stream response usage: #{usage.inspect}")
            log_langfuse_generation(
              name: "chat_response",
              model: model,
@@ -184,9 +198,11 @@ class Provider::Openai < Provider
              session_id: session_id,
              user_identifier: user_identifier
            )
+            record_llm_usage(family: family, model: model, operation: "chat", usage: usage)
            response
          else
            parsed = ChatParser.new(raw_response).parsed
+            Rails.logger.debug("Non-stream raw_response['usage']: #{raw_response['usage'].inspect}")
            log_langfuse_generation(
              name: "chat_response",
              model: model,
@@ -196,6 +212,7 @@ class Provider::Openai < Provider
              session_id: session_id,
              user_identifier: user_identifier
            )
+            record_llm_usage(family: family, model: model, operation: "chat", usage: raw_response["usage"])
            parsed
          end
        rescue => e
@@ -220,7 +237,8 @@ class Provider::Openai < Provider
      function_results: [],
      streamer: nil,
      session_id: nil,
-      user_identifier: nil
+      user_identifier: nil,
+      family: nil
    )
      with_provider_response do
        messages = build_generic_messages(
@@ -253,6 +271,8 @@ class Provider::Openai < Provider
            user_identifier: user_identifier
          )

+          record_llm_usage(family: family, model: model, operation: "chat", usage: raw_response["usage"])
+
          # If a streamer was provided, manually call it with the parsed response
          # to maintain the same contract as the streaming version
          if streamer.present?
@@ -408,4 +428,46 @@ class Provider::Openai < Provider
    rescue => e
      Rails.logger.warn("Langfuse logging failed: #{e.message}")
    end
+
+    def record_llm_usage(family:, model:, operation:, usage:)
+      return unless family && usage
+
+      Rails.logger.info("Recording LLM usage - Raw usage data: #{usage.inspect}")
+
+      # Handle both old and new OpenAI API response formats
+      # Old format: prompt_tokens, completion_tokens, total_tokens
+      # New format: input_tokens, output_tokens, total_tokens
+      prompt_tokens = usage["prompt_tokens"] || usage["input_tokens"] || 0
+      completion_tokens = usage["completion_tokens"] || usage["output_tokens"] || 0
+      total_tokens = usage["total_tokens"] || 0
+
+      Rails.logger.info("Extracted tokens - prompt: #{prompt_tokens}, completion: #{completion_tokens}, total: #{total_tokens}")
+
+      estimated_cost = LlmUsage.calculate_cost(
+        model: model,
+        prompt_tokens: prompt_tokens,
+        completion_tokens: completion_tokens
+      )
+
+      # Log when we can't estimate the cost (e.g., custom/self-hosted models)
+      if estimated_cost.nil?
+        Rails.logger.info("Recording LLM usage without cost estimate for unknown model: #{model} (custom provider: #{custom_provider?})")
+      end
+
+      inferred_provider = LlmUsage.infer_provider(model)
+      family.llm_usages.create!(
+        provider: inferred_provider,
+        model: model,
+        operation: operation,
+        prompt_tokens: prompt_tokens,
+        completion_tokens: completion_tokens,
+        total_tokens: total_tokens,
+        estimated_cost: estimated_cost,
+        metadata: {}
+      )
+
+      Rails.logger.info("LLM usage recorded successfully - Cost: #{estimated_cost.inspect}")
+    rescue => e
+      Rails.logger.error("Failed to record LLM usage: #{e.message}")
+    end
 end