From 96e38eab2c42e5a5d28daed2b7e4923eaaf35968 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Juan=20Jos=C3=A9=20Mata?= Date: Sat, 25 Oct 2025 20:21:13 +0000 Subject: [PATCH] First cut of additional AI_DEBUG --- app/models/assistant.rb | 31 ++++++++++ app/models/assistant/responder.rb | 11 ++-- app/models/provider/openai.rb | 6 ++ .../_assistant_message.html.erb | 4 ++ .../assistant_messages/_model_info.html.erb | 60 +++++++++++++++++++ .../assistant_messages/_tool_calls.html.erb | 2 +- .../20251025120000_add_usage_to_messages.rb | 11 ++++ db/schema.rb | 7 ++- 8 files changed, 125 insertions(+), 7 deletions(-) create mode 100644 app/views/assistant_messages/_model_info.html.erb create mode 100644 db/migrate/20251025120000_add_usage_to_messages.rb diff --git a/app/models/assistant.rb b/app/models/assistant.rb index 4e9fbb340..1fefa2b9e 100644 --- a/app/models/assistant.rb +++ b/app/models/assistant.rb @@ -55,6 +55,37 @@ class Assistant responder.on(:response) do |data| update_thinking("Analyzing your data...") + # Persist the provider's response identifier on the assistant message so + # future renders reflect the exact metadata used for this conversation + assistant_message.update!(provider_id: data[:id]) if data[:id].present? + + # Persist the endpoint used for this provider (if applicable) + if assistant_message.endpoint.blank? && llm_provider.respond_to?(:endpoint_base) + assistant_message.update!(endpoint: llm_provider.endpoint_base) + end + + # Persist usage metrics and estimated cost when provided by the LLM provider + if data[:usage].present? + usage = data[:usage] + + prompt_tokens = usage["prompt_tokens"] || usage["input_tokens"] || 0 + completion_tokens = usage["completion_tokens"] || usage["output_tokens"] || 0 + total_tokens = usage["total_tokens"] || (prompt_tokens + completion_tokens) + + estimated_cost = LlmUsage.calculate_cost( + model: message.ai_model, + prompt_tokens: prompt_tokens, + completion_tokens: completion_tokens + ) + + assistant_message.update!( + prompt_tokens: prompt_tokens, + completion_tokens: completion_tokens, + total_tokens: total_tokens, + estimated_cost: estimated_cost + ) + end + if data[:function_tool_calls].present? assistant_message.tool_calls = data[:function_tool_calls] latest_response_id = data[:id] diff --git a/app/models/assistant/responder.rb b/app/models/assistant/responder.rb index f2b6d121a..cf8e2d1dc 100644 --- a/app/models/assistant/responder.rb +++ b/app/models/assistant/responder.rb @@ -24,9 +24,9 @@ class Assistant::Responder response_handled = true if response.function_requests.any? - handle_follow_up_response(response) + handle_follow_up_response(response, usage: chunk.usage) else - emit(:response, { id: response.id }) + emit(:response, { id: response.id, usage: chunk.usage }) end end end @@ -46,14 +46,14 @@ class Assistant::Responder private attr_reader :message, :instructions, :function_tool_caller, :llm - def handle_follow_up_response(response) + def handle_follow_up_response(response, usage: nil) streamer = proc do |chunk| case chunk.type when "output_text" emit(:output_text, chunk.data) when "response" # We do not currently support function executions for a follow-up response (avoid recursive LLM calls that could lead to high spend) - emit(:response, { id: chunk.data.id }) + emit(:response, { id: chunk.data.id, usage: chunk.usage }) end end @@ -61,6 +61,7 @@ class Assistant::Responder emit(:response, { id: response.id, + usage: usage, function_tool_calls: function_tool_calls }) @@ -70,7 +71,7 @@ class Assistant::Responder function_results: function_tool_calls.map(&:to_result), previous_response_id: response.id ) - end + end def get_llm_response(streamer:, function_results: [], previous_response_id: nil) response = llm.chat_response( diff --git a/app/models/provider/openai.rb b/app/models/provider/openai.rb index e25361e36..019daa198 100644 --- a/app/models/provider/openai.rb +++ b/app/models/provider/openai.rb @@ -39,6 +39,12 @@ class Provider::Openai < Provider custom_provider? ? "Custom OpenAI-compatible (#{@uri_base})" : "OpenAI" end + # Returns the HTTP base endpoint used by this provider, or nil when using + # the default OpenAI endpoint configured by the SDK. + def endpoint_base + @uri_base.presence + end + def supported_models_description if custom_provider? @default_model.present? ? "configured model: #{@default_model}" : "any model" diff --git a/app/views/assistant_messages/_assistant_message.html.erb b/app/views/assistant_messages/_assistant_message.html.erb index 59356a788..daa083180 100644 --- a/app/views/assistant_messages/_assistant_message.html.erb +++ b/app/views/assistant_messages/_assistant_message.html.erb @@ -11,6 +11,10 @@
<%= markdown(assistant_message.content) %>
<% else %> + <% if assistant_message.chat.debug_mode? %> + <%= render "assistant_messages/model_info", message: assistant_message %> + <% end %> + <% if assistant_message.chat.debug_mode? && assistant_message.tool_calls.any? %> <%= render "assistant_messages/tool_calls", message: assistant_message %> <% end %> diff --git a/app/views/assistant_messages/_model_info.html.erb b/app/views/assistant_messages/_model_info.html.erb new file mode 100644 index 000000000..73a2cd27f --- /dev/null +++ b/app/views/assistant_messages/_model_info.html.erb @@ -0,0 +1,60 @@ +<%# locals: (message:) %> + +
+ + <%= icon("chevron-right", class: "group-open:transform group-open:rotate-90") %> +

Model Info

+
+ + <% model_name = message.ai_model %> + <% registry = Provider::Registry.for_concept(:llm) %> + <% provider = registry.providers.find { |p| p.supports_model?(model_name) } %> + <% in_sidebar = request.headers["Turbo-Frame"] == "sidebar_chat" %> + <% grid_classes = in_sidebar ? "grid grid-cols-1 gap-2" : "grid grid-cols-1 md:grid-cols-2 gap-2" %> + +
+
+
+

Model

+

<%= model_name %>

+
+ +
+

Provider

+

<%= provider&.provider_name || "Unknown" %>

+
+ + <% if message.provider_id.present? %> +
+

Response ID

+

<%= message.provider_id %>

+
+ <% end %> + + <% if message.endpoint.present? %> +
+

Endpoint

+

<%= message.endpoint %>

+
+ <% end %> + + <% if message.prompt_tokens.to_i > 0 || message.completion_tokens.to_i > 0 || message.total_tokens.to_i > 0 %> +
+

Tokens (this response)

+

+ prompt=<%= message.prompt_tokens %>, completion=<%= message.completion_tokens %>, total=<%= message.total_tokens %> +

+
+ <% end %> + + <% if message.estimated_cost.present? %> +
+

Estimated Cost (this response)

+

$<%= message.estimated_cost.to_f.round(4) %>

+
+ <% end %> +
+
+
+ + diff --git a/app/views/assistant_messages/_tool_calls.html.erb b/app/views/assistant_messages/_tool_calls.html.erb index 59c149225..63f467edb 100644 --- a/app/views/assistant_messages/_tool_calls.html.erb +++ b/app/views/assistant_messages/_tool_calls.html.erb @@ -8,7 +8,7 @@
<% message.tool_calls.each do |tool_call| %> -
+

Function:

<%= tool_call.function_name %>

Arguments:

diff --git a/db/migrate/20251025120000_add_usage_to_messages.rb b/db/migrate/20251025120000_add_usage_to_messages.rb new file mode 100644 index 000000000..1852bd96b --- /dev/null +++ b/db/migrate/20251025120000_add_usage_to_messages.rb @@ -0,0 +1,11 @@ +class AddUsageToMessages < ActiveRecord::Migration[7.2] + def change + change_table :messages, bulk: true do |t| + t.string :endpoint + t.integer :prompt_tokens, null: false, default: 0 + t.integer :completion_tokens, null: false, default: 0 + t.integer :total_tokens, null: false, default: 0 + t.decimal :estimated_cost, precision: 10, scale: 6 + end + end +end diff --git a/db/schema.rb b/db/schema.rb index 1d97447db..e56ce2305 100644 --- a/db/schema.rb +++ b/db/schema.rb @@ -10,7 +10,7 @@ # # It's strongly recommended that you check this file into your version control system. -ActiveRecord::Schema[7.2].define(version: 2025_10_24_083624) do +ActiveRecord::Schema[7.2].define(version: 2025_10_25_120000) do # These are extensions that must be enabled in order to support this database enable_extension "pgcrypto" enable_extension "plpgsql" @@ -479,6 +479,11 @@ ActiveRecord::Schema[7.2].define(version: 2025_10_24_083624) do t.boolean "debug", default: false t.string "provider_id" t.boolean "reasoning", default: false + t.string "endpoint" + t.integer "prompt_tokens", default: 0, null: false + t.integer "completion_tokens", default: 0, null: false + t.integer "total_tokens", default: 0, null: false + t.decimal "estimated_cost", precision: 10, scale: 6 t.index ["chat_id"], name: "index_messages_on_chat_id" end