From 5ab9bb33d6048e1e45f0722cab79586684ae72b4 Mon Sep 17 00:00:00 2001 From: Guillem Arias Date: Wed, 27 May 2026 10:41:53 +0200 Subject: [PATCH] fix(ai): sanitize Langfuse warn logs, normalize tool_use.input, dedup history fetch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses three open CodeRabbit findings on PR #1983. - Provider::Anthropic Langfuse rescue branches no longer include `e.full_message` in `Rails.logger.warn`. `full_message` bundles the backtrace + cause chain and on some SDK error types includes the serialized request/response payload (prompt, model output). Logs now report `#{e.class}: #{e.message}` only. Three sites: create_langfuse_trace, log_langfuse_generation, upsert_langfuse_trace. Note: Provider::Openai has the same pattern (copy-pasted source) — harmonization deferred to a follow-up cleanup PR; this commit fixes only the Anthropic provider to keep PR scope tight. - MessageFormatter#parse_arguments now coerces any non-Hash parsed result to `{}`. Anthropic's Messages API requires `tool_use.input` to be a JSON object (map); a stored ToolCall::Function record whose arguments parse to a scalar, bool, or array (corrupt row, legacy data, cross-provider bleed) would otherwise produce a payload the API rejects. Normal flow stores Hash arguments end-to-end so the fix is defensive — adds 2 tests covering scalar/array JSON strings and non-String non-Hash inputs. - Assistant::Responder dedups the chat-history fetch. The previous layout fired two near-identical `chat.messages.where(...).includes( :tool_calls).ordered` queries per LLM turn (one for the OpenAI-shape payload, one for the raw-records kwarg). A new memoized `complete_chat_messages` fetches once; `chat_message_records` filters out the current message via `Array#reject`, `openai_messages_payload` iterates the cached array unchanged. One SQL query per turn instead of two. Memoization scope = single Responder instance (per LLM call), so cache invalidation is not a concern. All 4370 tests pass (1 pre-existing libvips env error unrelated). Rubocop + brakeman clean. --- app/models/assistant/responder.rb | 35 ++++++++++------- app/models/provider/anthropic.rb | 9 +++-- .../provider/anthropic/message_formatter.rb | 22 +++++++---- .../anthropic/message_formatter_test.rb | 39 +++++++++++++++++++ 4 files changed, 79 insertions(+), 26 deletions(-) diff --git a/app/models/assistant/responder.rb b/app/models/assistant/responder.rb index a5950a51a..bfd86321c 100644 --- a/app/models/assistant/responder.rb +++ b/app/models/assistant/responder.rb @@ -117,18 +117,29 @@ class Assistant::Responder @chat ||= message.chat end + # Memoized fetch — both `chat_message_records` and `openai_messages_payload` + # derive their shape from this one in-memory array so a single chat turn + # fires one history query instead of two. + def complete_chat_messages + return @complete_chat_messages if defined?(@complete_chat_messages) + + @complete_chat_messages = + if chat&.messages + chat.messages + .where(type: [ "UserMessage", "AssistantMessage" ], status: "complete") + .includes(:tool_calls) + .ordered + .to_a + else + [] + end + end + # Raw Message records preceding the current turn — providers that build # their own native message shape (Anthropic) consume this directly so they # do not have to round-trip through the OpenAI-shaped payload below. def chat_message_records - return [] unless chat&.messages - - chat.messages - .where(type: [ "UserMessage", "AssistantMessage" ], status: "complete") - .where.not(id: message.id) - .includes(:tool_calls) - .ordered - .to_a + complete_chat_messages.reject { |m| m.id == message.id } end # Builds the OpenAI-shaped messages payload (role: "user" | "assistant" | @@ -136,13 +147,7 @@ class Assistant::Responder # chat path. Anthropic uses chat_message_records instead. def openai_messages_payload messages = [] - return messages unless chat&.messages - - chat.messages - .where(type: [ "UserMessage", "AssistantMessage" ], status: "complete") - .includes(:tool_calls) - .ordered - .each do |chat_message| + complete_chat_messages.each do |chat_message| if chat_message.tool_calls.any? messages << { role: chat_message.role, diff --git a/app/models/provider/anthropic.rb b/app/models/provider/anthropic.rb index 181f1a76f..5d530c62a 100644 --- a/app/models/provider/anthropic.rb +++ b/app/models/provider/anthropic.rb @@ -262,7 +262,10 @@ class Provider::Anthropic < Provider environment: Rails.env ) rescue => e - Rails.logger.warn("Langfuse trace creation failed: #{e.message}\n#{e.full_message}") + # Sanitized log (class + message only) — `e.full_message` bundles the + # backtrace + cause chain, which on some SDK error types includes the + # serialized request/response payload (model output, user prompt). + Rails.logger.warn("Langfuse trace creation failed: #{e.class}: #{e.message}") nil end @@ -286,7 +289,7 @@ class Provider::Anthropic < Provider upsert_langfuse_trace(trace: trace, output: output) end rescue => e - Rails.logger.warn("Langfuse logging failed: #{e.message}\n#{e.full_message}") + Rails.logger.warn("Langfuse logging failed: #{e.class}: #{e.message}") end def upsert_langfuse_trace(trace:, output:, level: nil) @@ -297,7 +300,7 @@ class Provider::Anthropic < Provider langfuse_client.trace(**payload) rescue => e - Rails.logger.warn("Langfuse trace upsert failed for trace_id=#{trace&.id}: #{e.message}\n#{e.full_message}") + Rails.logger.warn("Langfuse trace upsert failed for trace_id=#{trace&.id}: #{e.class}: #{e.message}") nil end diff --git a/app/models/provider/anthropic/message_formatter.rb b/app/models/provider/anthropic/message_formatter.rb index b6ba2717f..7c697368c 100644 --- a/app/models/provider/anthropic/message_formatter.rb +++ b/app/models/provider/anthropic/message_formatter.rb @@ -111,15 +111,21 @@ class Provider::Anthropic::MessageFormatter } end + # Anthropic's Messages API requires `tool_use.input` to be a JSON object + # (map). Normalize any non-Hash result to `{}` so corrupt or legacy + # ToolCall::Function records can't produce a payload Anthropic rejects. def parse_arguments(arguments) - case arguments - when nil then {} - when Hash then arguments - when String - return {} if arguments.blank? - JSON.parse(arguments) - else arguments - end + parsed = + case arguments + when nil then {} + when Hash then arguments + when String + return {} if arguments.blank? + JSON.parse(arguments) + else arguments + end + + parsed.is_a?(Hash) ? parsed : {} rescue JSON::ParserError {} end diff --git a/test/models/provider/anthropic/message_formatter_test.rb b/test/models/provider/anthropic/message_formatter_test.rb index cff8c625f..88a7d20e9 100644 --- a/test/models/provider/anthropic/message_formatter_test.rb +++ b/test/models/provider/anthropic/message_formatter_test.rb @@ -171,6 +171,45 @@ class Provider::Anthropic::MessageFormatterTest < ActiveSupport::TestCase assert_equal "", messages[2][:content].first[:content] end + # Anthropic's tool_use.input MUST be a JSON object (map). If a stored + # ToolCall::Function record carries arguments that parse to a scalar or + # array (corrupt row, legacy data, OpenAI cross-bleed), the formatter + # must coerce them to `{}` so we don't ship an invalid payload. + test "coerces non-Hash parsed arguments to empty Hash" do + [ '"hello"', "123", "true", "[1,2,3]" ].each do |non_object_json| + formatter = Provider::Anthropic::MessageFormatter.new( + prompt: "go", + function_results: [ { + call_id: "toolu_x", + name: "noop", + arguments: non_object_json, + output: nil + } ] + ) + + messages = formatter.build + + assert_equal({}, messages[1][:content].first[:input], + "expected empty Hash for arguments=#{non_object_json.inspect}") + end + end + + test "coerces non-Hash non-String arguments to empty Hash" do + formatter = Provider::Anthropic::MessageFormatter.new( + prompt: "go", + function_results: [ { + call_id: "toolu_x", + name: "noop", + arguments: [ 1, 2, 3 ], + output: nil + } ] + ) + + messages = formatter.build + + assert_equal({}, messages[1][:content].first[:input]) + end + private def stub_user_message(content) msg = UserMessage.new(content: content, ai_model: "claude-sonnet-4-6")