mirror of
https://github.com/we-promise/sure.git
synced 2026-04-25 23:14:10 +00:00
First cut of additional AI_DEBUG
This commit is contained in:
@@ -55,6 +55,37 @@ class Assistant
|
||||
responder.on(:response) do |data|
|
||||
update_thinking("Analyzing your data...")
|
||||
|
||||
# Persist the provider's response identifier on the assistant message so
|
||||
# future renders reflect the exact metadata used for this conversation
|
||||
assistant_message.update!(provider_id: data[:id]) if data[:id].present?
|
||||
|
||||
# Persist the endpoint used for this provider (if applicable)
|
||||
if assistant_message.endpoint.blank? && llm_provider.respond_to?(:endpoint_base)
|
||||
assistant_message.update!(endpoint: llm_provider.endpoint_base)
|
||||
end
|
||||
|
||||
# Persist usage metrics and estimated cost when provided by the LLM provider
|
||||
if data[:usage].present?
|
||||
usage = data[:usage]
|
||||
|
||||
prompt_tokens = usage["prompt_tokens"] || usage["input_tokens"] || 0
|
||||
completion_tokens = usage["completion_tokens"] || usage["output_tokens"] || 0
|
||||
total_tokens = usage["total_tokens"] || (prompt_tokens + completion_tokens)
|
||||
|
||||
estimated_cost = LlmUsage.calculate_cost(
|
||||
model: message.ai_model,
|
||||
prompt_tokens: prompt_tokens,
|
||||
completion_tokens: completion_tokens
|
||||
)
|
||||
|
||||
assistant_message.update!(
|
||||
prompt_tokens: prompt_tokens,
|
||||
completion_tokens: completion_tokens,
|
||||
total_tokens: total_tokens,
|
||||
estimated_cost: estimated_cost
|
||||
)
|
||||
end
|
||||
|
||||
if data[:function_tool_calls].present?
|
||||
assistant_message.tool_calls = data[:function_tool_calls]
|
||||
latest_response_id = data[:id]
|
||||
|
||||
@@ -24,9 +24,9 @@ class Assistant::Responder
|
||||
response_handled = true
|
||||
|
||||
if response.function_requests.any?
|
||||
handle_follow_up_response(response)
|
||||
handle_follow_up_response(response, usage: chunk.usage)
|
||||
else
|
||||
emit(:response, { id: response.id })
|
||||
emit(:response, { id: response.id, usage: chunk.usage })
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -46,14 +46,14 @@ class Assistant::Responder
|
||||
private
|
||||
attr_reader :message, :instructions, :function_tool_caller, :llm
|
||||
|
||||
def handle_follow_up_response(response)
|
||||
def handle_follow_up_response(response, usage: nil)
|
||||
streamer = proc do |chunk|
|
||||
case chunk.type
|
||||
when "output_text"
|
||||
emit(:output_text, chunk.data)
|
||||
when "response"
|
||||
# We do not currently support function executions for a follow-up response (avoid recursive LLM calls that could lead to high spend)
|
||||
emit(:response, { id: chunk.data.id })
|
||||
emit(:response, { id: chunk.data.id, usage: chunk.usage })
|
||||
end
|
||||
end
|
||||
|
||||
@@ -61,6 +61,7 @@ class Assistant::Responder
|
||||
|
||||
emit(:response, {
|
||||
id: response.id,
|
||||
usage: usage,
|
||||
function_tool_calls: function_tool_calls
|
||||
})
|
||||
|
||||
@@ -70,7 +71,7 @@ class Assistant::Responder
|
||||
function_results: function_tool_calls.map(&:to_result),
|
||||
previous_response_id: response.id
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def get_llm_response(streamer:, function_results: [], previous_response_id: nil)
|
||||
response = llm.chat_response(
|
||||
|
||||
@@ -39,6 +39,12 @@ class Provider::Openai < Provider
|
||||
custom_provider? ? "Custom OpenAI-compatible (#{@uri_base})" : "OpenAI"
|
||||
end
|
||||
|
||||
# Returns the HTTP base endpoint used by this provider, or nil when using
|
||||
# the default OpenAI endpoint configured by the SDK.
|
||||
def endpoint_base
|
||||
@uri_base.presence
|
||||
end
|
||||
|
||||
def supported_models_description
|
||||
if custom_provider?
|
||||
@default_model.present? ? "configured model: #{@default_model}" : "any model"
|
||||
|
||||
Reference in New Issue
Block a user