First cut of additional AI_DEBUG

2026-04-25 23:14:10 +00:00 · 2025-10-25 20:21:13 +00:00
parent b24b10262e
commit 96e38eab2c
8 changed files with 125 additions and 7 deletions
--- a/app/models/assistant.rb
+++ b/app/models/assistant.rb
@@ -55,6 +55,37 @@ class Assistant
    responder.on(:response) do |data|
      update_thinking("Analyzing your data...")

+      # Persist the provider's response identifier on the assistant message so
+      # future renders reflect the exact metadata used for this conversation
+      assistant_message.update!(provider_id: data[:id]) if data[:id].present?
+
+      # Persist the endpoint used for this provider (if applicable)
+      if assistant_message.endpoint.blank? && llm_provider.respond_to?(:endpoint_base)
+        assistant_message.update!(endpoint: llm_provider.endpoint_base)
+      end
+
+      # Persist usage metrics and estimated cost when provided by the LLM provider
+      if data[:usage].present?
+        usage = data[:usage]
+
+        prompt_tokens = usage["prompt_tokens"] || usage["input_tokens"] || 0
+        completion_tokens = usage["completion_tokens"] || usage["output_tokens"] || 0
+        total_tokens = usage["total_tokens"] || (prompt_tokens + completion_tokens)
+
+        estimated_cost = LlmUsage.calculate_cost(
+          model: message.ai_model,
+          prompt_tokens: prompt_tokens,
+          completion_tokens: completion_tokens
+        )
+
+        assistant_message.update!(
+          prompt_tokens: prompt_tokens,
+          completion_tokens: completion_tokens,
+          total_tokens: total_tokens,
+          estimated_cost: estimated_cost
+        )
+      end
+
      if data[:function_tool_calls].present?
        assistant_message.tool_calls = data[:function_tool_calls]
        latest_response_id = data[:id]
--- a/app/models/assistant/responder.rb
+++ b/app/models/assistant/responder.rb
@@ -24,9 +24,9 @@ class Assistant::Responder
        response_handled = true

        if response.function_requests.any?
-          handle_follow_up_response(response)
+          handle_follow_up_response(response, usage: chunk.usage)
        else
-          emit(:response, { id: response.id })
+          emit(:response, { id: response.id, usage: chunk.usage })
        end
      end
    end
@@ -46,14 +46,14 @@ class Assistant::Responder
  private
    attr_reader :message, :instructions, :function_tool_caller, :llm

-    def handle_follow_up_response(response)
+    def handle_follow_up_response(response, usage: nil)
      streamer = proc do |chunk|
        case chunk.type
        when "output_text"
          emit(:output_text, chunk.data)
        when "response"
          # We do not currently support function executions for a follow-up response (avoid recursive LLM calls that could lead to high spend)
-          emit(:response, { id: chunk.data.id })
+          emit(:response, { id: chunk.data.id, usage: chunk.usage })
        end
      end

@@ -61,6 +61,7 @@ class Assistant::Responder

      emit(:response, {
        id: response.id,
+        usage: usage,
        function_tool_calls: function_tool_calls
      })

@@ -70,7 +71,7 @@ class Assistant::Responder
        function_results: function_tool_calls.map(&:to_result),
        previous_response_id: response.id
      )
-    end
+      end

    def get_llm_response(streamer:, function_results: [], previous_response_id: nil)
      response = llm.chat_response(
--- a/app/models/provider/openai.rb
+++ b/app/models/provider/openai.rb
@@ -39,6 +39,12 @@ class Provider::Openai < Provider
    custom_provider? ? "Custom OpenAI-compatible (#{@uri_base})" : "OpenAI"
  end

+  # Returns the HTTP base endpoint used by this provider, or nil when using
+  # the default OpenAI endpoint configured by the SDK.
+  def endpoint_base
+    @uri_base.presence
+  end
+
  def supported_models_description
    if custom_provider?
      @default_model.present? ? "configured model: #{@default_model}" : "any model"