First cut of additional AI_DEBUG

This commit is contained in:
Juan José Mata
2025-10-25 20:21:13 +00:00
parent b24b10262e
commit 96e38eab2c
8 changed files with 125 additions and 7 deletions

View File

@@ -55,6 +55,37 @@ class Assistant
responder.on(:response) do |data|
update_thinking("Analyzing your data...")
# Persist the provider's response identifier on the assistant message so
# future renders reflect the exact metadata used for this conversation
assistant_message.update!(provider_id: data[:id]) if data[:id].present?
# Persist the endpoint used for this provider (if applicable)
if assistant_message.endpoint.blank? && llm_provider.respond_to?(:endpoint_base)
assistant_message.update!(endpoint: llm_provider.endpoint_base)
end
# Persist usage metrics and estimated cost when provided by the LLM provider
if data[:usage].present?
usage = data[:usage]
prompt_tokens = usage["prompt_tokens"] || usage["input_tokens"] || 0
completion_tokens = usage["completion_tokens"] || usage["output_tokens"] || 0
total_tokens = usage["total_tokens"] || (prompt_tokens + completion_tokens)
estimated_cost = LlmUsage.calculate_cost(
model: message.ai_model,
prompt_tokens: prompt_tokens,
completion_tokens: completion_tokens
)
assistant_message.update!(
prompt_tokens: prompt_tokens,
completion_tokens: completion_tokens,
total_tokens: total_tokens,
estimated_cost: estimated_cost
)
end
if data[:function_tool_calls].present?
assistant_message.tool_calls = data[:function_tool_calls]
latest_response_id = data[:id]

View File

@@ -24,9 +24,9 @@ class Assistant::Responder
response_handled = true
if response.function_requests.any?
handle_follow_up_response(response)
handle_follow_up_response(response, usage: chunk.usage)
else
emit(:response, { id: response.id })
emit(:response, { id: response.id, usage: chunk.usage })
end
end
end
@@ -46,14 +46,14 @@ class Assistant::Responder
private
attr_reader :message, :instructions, :function_tool_caller, :llm
def handle_follow_up_response(response)
def handle_follow_up_response(response, usage: nil)
streamer = proc do |chunk|
case chunk.type
when "output_text"
emit(:output_text, chunk.data)
when "response"
# We do not currently support function executions for a follow-up response (avoid recursive LLM calls that could lead to high spend)
emit(:response, { id: chunk.data.id })
emit(:response, { id: chunk.data.id, usage: chunk.usage })
end
end
@@ -61,6 +61,7 @@ class Assistant::Responder
emit(:response, {
id: response.id,
usage: usage,
function_tool_calls: function_tool_calls
})
@@ -70,7 +71,7 @@ class Assistant::Responder
function_results: function_tool_calls.map(&:to_result),
previous_response_id: response.id
)
end
end
def get_llm_response(streamer:, function_results: [], previous_response_id: nil)
response = llm.chat_response(

View File

@@ -39,6 +39,12 @@ class Provider::Openai < Provider
custom_provider? ? "Custom OpenAI-compatible (#{@uri_base})" : "OpenAI"
end
# Returns the HTTP base endpoint used by this provider, or nil when using
# the default OpenAI endpoint configured by the SDK.
def endpoint_base
@uri_base.presence
end
def supported_models_description
if custom_provider?
@default_model.present? ? "configured model: #{@default_model}" : "any model"