fix(chat): eager pending AssistantMessage to fix Turbo subscribe race (#1657) (#1658)

* fix(chat): persist eager pending assistant message to fix subscribe race

When the LLM replies in ~1-2s the assistant message broadcast could
fire before the client's Turbo stream subscription was established,
leaving the UI stuck on the thinking indicator while the response was
already persisted.

Create the AssistantMessage as `pending` synchronously in
`Chat#ask_assistant_later`, so it is rendered server-side on the chat
show page with a "Thinking ..." inline placeholder. The worker then
finds and updates the existing row via `append_text!`, which flips the
status to `complete` and broadcasts updates against a DOM id that is
already in the page — no race possible. On error, the placeholder is
destroyed if no content streamed, otherwise demoted to `failed`.

Replaces the standalone thinking indicator partial and the
`Assistant::Broadcastable` thinking helpers, both now redundant.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix(chat): bind each assistant job to its specific pending placeholder

Addressing review feedback on #1658:

1. The pending placeholder lookup based on `last pending` was racy —
   back-to-back user messages would let one job fill another job's
   placeholder. Pass the placeholder through the job arguments
   (`AssistantResponseJob.perform_later(user_message, pending)`) so
   each turn is bound to its own row.

2. In `Assistant::External#respond_to`, the configured/authorized
   guards raise before the local was bound, leaving rescue cleanup
   with `nil` and the placeholder visible forever. Bind the parameter
   first so cleanup can destroy it on the misconfigured path.

The kwarg defaults to nil so the API#retry path
(`AssistantResponseJob.perform_later(new_message)`) and the model-level
test calls continue to work — they fall back to an in-memory new
message, restoring the original test count assertions.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix(chat): i18n the pending assistant placeholder string

Move the hardcoded "Thinking ..." indicator into the locale file per
CLAUDE.md i18n guidelines. With i18n.fallbacks enabled, non-en locales
fall back to English until translated.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* Add thinking label translations

* Fix chat pending assistant expectations

* Fix external assistant pending test lookup

* Scope chat stream targets per chat

* Update message broadcast target tests

---------

Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Michal Tajchert
2026-05-03 20:33:29 +02:00
committed by GitHub
parent 50936000e7
commit ccd6a53071
28 changed files with 91 additions and 92 deletions

View File

@@ -43,7 +43,7 @@ class ChatsController < ApplicationController
def retry
@chat.retry_last_message!
redirect_to chat_path(@chat, thinking: true)
redirect_to chat_path(@chat)
end
private

View File

@@ -1,7 +1,7 @@
class AssistantResponseJob < ApplicationJob
queue_as :high_priority
def perform(message)
message.request_response
def perform(message, assistant_message = nil)
message.request_response(assistant_message: assistant_message)
end
end

View File

@@ -1,13 +1,11 @@
class Assistant::Base
include Assistant::Broadcastable
attr_reader :chat
def initialize(chat)
@chat = chat
end
def respond_to(message)
def respond_to(message, assistant_message: nil)
raise NotImplementedError, "#{self.class}#respond_to must be implemented"
end
end

View File

@@ -1,12 +0,0 @@
module Assistant::Broadcastable
extend ActiveSupport::Concern
private
def update_thinking(thought)
chat.broadcast_update target: "thinking-indicator", partial: "chats/thinking_indicator", locals: { chat: chat, message: thought }
end
def stop_thinking
chat.broadcast_remove target: "thinking-indicator"
end
end

View File

@@ -17,12 +17,8 @@ class Assistant::Builtin < Assistant::Base
@functions = functions
end
def respond_to(message)
assistant_message = AssistantMessage.new(
chat: chat,
content: "",
ai_model: message.ai_model
)
def respond_to(message, assistant_message: nil)
assistant_message ||= AssistantMessage.new(chat: chat, content: "", ai_model: message.ai_model)
llm_provider = get_model_provider(message.ai_model)
unless llm_provider
@@ -40,7 +36,6 @@ class Assistant::Builtin < Assistant::Base
responder.on(:output_text) do |text|
if assistant_message.content.blank?
stop_thinking
Chat.transaction do
assistant_message.append_text!(text)
chat.update_latest_response!(latest_response_id)
@@ -51,7 +46,6 @@ class Assistant::Builtin < Assistant::Base
end
responder.on(:response) do |data|
update_thinking("Analyzing your data...")
if data[:function_tool_calls].present?
assistant_message.tool_calls = data[:function_tool_calls]
latest_response_id = data[:id]
@@ -62,13 +56,13 @@ class Assistant::Builtin < Assistant::Base
responder.respond(previous_response_id: latest_response_id)
rescue => e
stop_thinking
# If we streamed any partial content before the error, the message was
# persisted with the default `complete` status. Demote it to `failed` so
# `Assistant::Responder#conversation_history` won't feed a broken turn
# back into future prompts.
if assistant_message&.persisted?
assistant_message.update_columns(status: "failed")
if assistant_message.content.blank?
assistant_message.destroy
else
# Demote partially-streamed turns to `failed` so `Responder#conversation_history` excludes them.
assistant_message.update_columns(status: "failed")
end
end
chat.add_error(e)
end

View File

@@ -33,8 +33,9 @@ class Assistant::External < Assistant::Base
end
end
def respond_to(message)
def respond_to(message, assistant_message: nil)
response_completed = false
assistant_message ||= AssistantMessage.new(chat: chat, content: "", ai_model: "external-agent")
unless self.class.configured?
raise Assistant::Error,
@@ -45,12 +46,6 @@ class Assistant::External < Assistant::Base
raise Assistant::Error, "Your account is not authorized to use the external assistant."
end
assistant_message = AssistantMessage.new(
chat: chat,
content: "",
ai_model: "external-agent"
)
client = build_client
messages = build_conversation_messages
@@ -58,17 +53,10 @@ class Assistant::External < Assistant::Base
messages: messages,
user: "sure-family-#{chat.user.family_id}"
) do |text|
if assistant_message.content.blank?
stop_thinking
assistant_message.content = text
assistant_message.save!
else
assistant_message.append_text!(text)
end
assistant_message.append_text!(text)
end
if assistant_message.new_record?
stop_thinking
if assistant_message.content.blank?
raise Assistant::Error, "External assistant returned an empty response."
end
@@ -76,12 +64,10 @@ class Assistant::External < Assistant::Base
assistant_message.update!(ai_model: model) if model.present?
rescue Assistant::Error, ActiveRecord::ActiveRecordError => e
cleanup_partial_response(assistant_message) unless response_completed
stop_thinking
chat.add_error(e)
rescue => e
Rails.logger.error("[Assistant::External] Unexpected error: #{e.class} - #{e.message}")
cleanup_partial_response(assistant_message) unless response_completed
stop_thinking
chat.add_error(Assistant::Error.new("Something went wrong with the external assistant. Check server logs for details."))
end
@@ -103,7 +89,7 @@ class Assistant::External < Assistant::Base
end
def build_conversation_messages
chat.conversation_messages.ordered.last(MAX_CONVERSATION_MESSAGES).map do |msg|
chat.conversation_messages.where(status: "complete").ordered.last(MAX_CONVERSATION_MESSAGES).map do |msg|
{ role: msg.role, content: msg.content }
end
end

View File

@@ -7,6 +7,7 @@ class AssistantMessage < Message
def append_text!(text)
self.content += text
self.status = :complete if pending?
save!
end
end

View File

@@ -79,7 +79,7 @@ class Chat < ApplicationRecord
def add_error(e)
update!(error: build_error_payload(e).to_json)
broadcast_append target: "messages", partial: "chats/error", locals: { chat: self }
broadcast_append target: messages_target, partial: "chats/error", locals: { chat: self }
end
def presentable_error_message
@@ -93,20 +93,29 @@ class Chat < ApplicationRecord
def clear_error
update! error: nil
broadcast_remove target: "chat-error"
broadcast_remove target: error_target
end
def conversation_messages
messages.where(type: [ "UserMessage", "AssistantMessage" ])
end
def ask_assistant_later(message)
clear_error
AssistantResponseJob.perform_later(message)
def messages_target
ActionView::RecordIdentifier.dom_id(self, :messages)
end
def ask_assistant(message)
assistant.respond_to(message)
def error_target
ActionView::RecordIdentifier.dom_id(self, :chat_error)
end
def ask_assistant_later(message)
clear_error
pending = messages.create!(type: "AssistantMessage", content: "", ai_model: message.ai_model, status: :pending)
AssistantResponseJob.perform_later(message, pending)
end
def ask_assistant(message, assistant_message: nil)
assistant.respond_to(message, assistant_message: assistant_message)
end
private

View File

@@ -8,9 +8,9 @@ class Message < ApplicationRecord
failed: "failed"
}
validates :content, presence: true
validates :content, presence: true, unless: :pending?
after_create_commit -> { broadcast_append_to chat, target: "messages" }, if: :broadcast?
after_create_commit -> { broadcast_append_to chat, target: chat.messages_target }, if: :broadcast?
after_update_commit -> { broadcast_update_to chat }, if: :broadcast?
scope :ordered, -> { order(created_at: :asc) }

View File

@@ -11,7 +11,7 @@ class UserMessage < Message
chat.ask_assistant_later(self)
end
def request_response
chat.ask_assistant(self)
def request_response(assistant_message: nil)
chat.ask_assistant(self, assistant_message: assistant_message)
end
end

View File

@@ -1,7 +1,12 @@
<%# locals: (assistant_message:) %>
<div id="<%= dom_id(assistant_message) %>">
<% if assistant_message.reasoning? %>
<% if assistant_message.pending? %>
<div class="flex items-start gap-3 mb-6">
<%= render "chats/ai_avatar" %>
<p class="text-sm text-secondary animate-pulse"><%= t("chats.thinking") %></p>
</div>
<% elsif assistant_message.reasoning? %>
<details class="group mb-1">
<summary class="flex items-center gap-2">
<p class="text-secondary text-sm">Assistant reasoning</p>

View File

@@ -1,6 +1,6 @@
<%# locals: (chat:) %>
<div id="chat-error" class="px-3 py-2 bg-red-100 border border-red-500 rounded-lg">
<div id="<%= chat.error_target %>" class="px-3 py-2 bg-red-100 border border-red-500 rounded-lg">
<% if chat.debug_mode? %>
<div class="overflow-x-auto text-xs p-4 bg-red-200 rounded-md mb-2">
<code><%= chat.technical_error_message %></code>

View File

@@ -1,6 +0,0 @@
<%# locals: (chat:, message: "Thinking ...") -%>
<div id="thinking-indicator" class="flex items-start gap-3">
<%= render "chats/ai_avatar" %>
<p class="text-sm text-secondary animate-pulse"><%= message %></p>
</div>

View File

@@ -15,7 +15,7 @@
<% end %>
</div>
<div id="messages" class="grow overflow-y-auto p-4 space-y-6 lg:pb-4" data-chat-target="messages">
<div id="<%= @chat.messages_target %>" class="grow overflow-y-auto p-4 space-y-6 lg:pb-4" data-chat-target="messages">
<% if @chat.conversation_messages.any? %>
<% @chat.conversation_messages.ordered.each do |message| %>
<%= render message %>
@@ -26,10 +26,6 @@
</div>
<% end %>
<% if params[:thinking].present? %>
<%= render "chats/thinking_indicator", chat: @chat %>
<% end %>
<% if @chat.error.present? && @chat.needs_assistant_response? %>
<%= render "chats/error", chat: @chat %>
<% end %>

View File

@@ -5,3 +5,4 @@ ca:
per Cloudflare Workers AI. Els resultats poden variar ja que el codi es va provar
principalment amb `gpt-4.1` però els teus tokens no van a cap altre lloc per ser entrenats! 🤖"
demo_banner_title: Mode de demostració
thinking: "Treballant ..."

View File

@@ -3,3 +3,4 @@ de:
chats:
demo_banner_title: "Demo-Modus aktiv"
demo_banner_message: "Sie nutzen ein Open-Weight Qwen3-LLM mit Credits von Cloudflare Workers AI. Die Ergebnisse können variieren, da die Codebasis hauptsächlich mit `gpt-4.1` getestet wurde Ihre Tokens werden jedoch nicht anderswo zum Training verwendet! 🤖"
thinking: "Wird verarbeitet ..."

View File

@@ -3,3 +3,4 @@ en:
chats:
demo_banner_title: "Demo Mode Active"
demo_banner_message: "You are using LLMs via credits provided by Cloudflare Workers AI. Results may vary since the codebase was tested on `gpt-4.1` but your tokens don't go anywhere else to be trained with! 🤖"
thinking: "Thinking ..."

View File

@@ -2,4 +2,5 @@
es:
chats:
demo_banner_title: "Modo de demostración activo"
demo_banner_message: "Estás utilizando un LLM Qwen3 de pesos abiertos con créditos proporcionados por Cloudflare Workers AI. Los resultados pueden variar, ya que la base de código se probó principalmente con `gpt-4.1`, ¡pero tus tokens no se enviarán a ningún otro lugar para ser entrenados! 🤖"
demo_banner_message: "Estás utilizando un LLM Qwen3 de pesos abiertos con créditos proporcionados por Cloudflare Workers AI. Los resultados pueden variar, ya que la base de código se probó principalmente con `gpt-4.1`, ¡pero tus tokens no se enviarán a ningún otro lugar para ser entrenados! 🤖"
thinking: "Procesando ..."

View File

@@ -3,3 +3,4 @@ fr:
chats:
demo_banner_title: "Mode Démo Actif"
demo_banner_message: "Vous utilisez un LLM Qwen3 open-weights avec des crédits fournis par Cloudflare Workers AI. Les résultats peuvent varier car le code a été principalement testé sur `gpt-4.1` mais vos tokens ne sont envoyés nulle part ailleurs pour être entraînés !"
thinking: "Traitement en cours ..."

View File

@@ -3,3 +3,4 @@ nl:
chats:
demo_banner_title: "Demo Modus Actief"
demo_banner_message: "U gebruikt een LLM met credits verstrekt door Cloudflare Workers AI. Resultaten kunnen variëren aangezien de codebase voornamelijk is getest op `gpt-4.1`, maar uw tokens worden nergens anders voor training gebruikt! 🤖"
thinking: "Bezig ..."

View File

@@ -3,3 +3,4 @@ pl:
chats:
demo_banner_title: "Aktywny tryb demo"
demo_banner_message: "Używasz modelu LLM z kredytami udostępnionymi przez Cloudflare Workers AI. Wyniki mogą się różnić, ponieważ kod aplikacji był głównie testowany na `gpt-4.1`, ale Twoje tokeny nie są nigdzie dalej wykorzystywane do trenowania! 🤖"
thinking: "Przetwarzanie ..."

View File

@@ -3,3 +3,4 @@ zh-CN:
chats:
demo_banner_message: "您正在使用由 Cloudflare Workers AI 提供额度的开源权重 Qwen3 大语言模型。由于代码库主要在 `gpt-4.1` 上测试,结果可能有所不同,但您的令牌不会被用于其他地方进行训练!🤖"
demo_banner_title: 演示模式已激活
thinking: "处理中 ..."

View File

@@ -3,3 +3,4 @@ zh-TW:
chats:
demo_banner_message: "您正在使用由 Cloudflare Workers AI 提供額度的開源權重 Qwen3 大語言模型。由於程式碼庫主要在 `gpt-4.1` 上測試,結果可能有所不同,但您的令牌不會被用於其他地方進行訓練!🤖"
demo_banner_title: 演示模式已啟動
thinking: "處理中 ..."

View File

@@ -37,7 +37,7 @@ class Api::V1::MessagesControllerTest < ActionDispatch::IntegrationTest
end
test "should create message with write scope" do
assert_difference "Message.count" do
assert_difference "UserMessage.count" do
post "/api/v1/chats/#{@chat.id}/messages",
params: { content: "Test message", model: "gpt-4" },
headers: bearer_auth_header(@write_token)

View File

@@ -12,7 +12,7 @@ class AssistantMessageTest < ActiveSupport::TestCase
streams = capture_turbo_stream_broadcasts(@chat)
assert_equal 2, streams.size
assert_equal "append", streams.first["action"]
assert_equal "messages", streams.first["target"]
assert_equal @chat.messages_target, streams.first["target"]
assert_equal "update", streams.last["action"]
assert_equal "assistant_message_#{message.id}", streams.last["target"]
end

View File

@@ -226,11 +226,13 @@ class AssistantTest < ActiveSupport::TestCase
"EXTERNAL_ASSISTANT_URL" => "http://localhost:18789/v1/chat",
"EXTERNAL_ASSISTANT_TOKEN" => "test-token"
) do
assert_difference "AssistantMessage.count", 1 do
assistant.respond_to(@message)
assistant_message = pending_assistant_message
assert_no_difference "AssistantMessage.count" do
assistant.respond_to(@message, assistant_message: assistant_message)
end
response_msg = @chat.messages.where(type: "AssistantMessage").last
response_msg = assistant_message.reload
assert_equal "Your net worth is $124,200.", response_msg.content
assert_equal "ext-agent:main", response_msg.ai_model
end
@@ -368,12 +370,13 @@ class AssistantTest < ActiveSupport::TestCase
"EXTERNAL_ASSISTANT_TOKEN" => "test-token"
) do
assistant = Assistant::External.new(@chat)
assistant.respond_to(@message)
assistant_message = pending_assistant_message
assistant.respond_to(@message, assistant_message: assistant_message)
@chat.reload
assert_nil @chat.error
response = @chat.messages.where(type: "AssistantMessage").last
response = assistant_message.reload
assert_equal "Based on your accounts, your net worth is $50,000.", response.content
assert_equal "ext-agent:main", response.ai_model
end
@@ -414,9 +417,10 @@ class AssistantTest < ActiveSupport::TestCase
"EXTERNAL_ASSISTANT_URL" => "http://localhost:18789/v1/chat",
"EXTERNAL_ASSISTANT_TOKEN" => "test-token"
) do
assistant.respond_to(@message)
assistant_message = pending_assistant_message
assistant.respond_to(@message, assistant_message: assistant_message)
response = @chat.messages.where(type: "AssistantMessage").last
response = assistant_message.reload
assert_equal "ext-agent:custom", response.ai_model
end
end
@@ -536,6 +540,10 @@ class AssistantTest < ActiveSupport::TestCase
capture
end
def pending_assistant_message
@chat.messages.where(type: "AssistantMessage", status: "pending").order(:created_at).last
end
def provider_function_request(id:, call_id:, function_name:, function_args:)
Provider::LlmConcept::ChatFunctionRequest.new(
id: id,

View File

@@ -18,14 +18,25 @@ class ChatTest < ActiveSupport::TestCase
assert_equal 3, chat.conversation_messages.count
end
test "uses chat-scoped stream targets" do
first_chat = chats(:one)
second_chat = chats(:two)
assert_not_equal "messages", first_chat.messages_target
assert_not_equal "chat-error", first_chat.error_target
assert_not_equal first_chat.messages_target, second_chat.messages_target
assert_not_equal first_chat.error_target, second_chat.error_target
end
test "creates with initial message" do
prompt = "Test prompt"
assert_difference "@user.chats.count", 1 do
chat = @user.chats.start!(prompt, model: "gpt-4.1")
assert_equal 1, chat.messages.count
assert_equal 2, chat.messages.count
assert_equal 1, chat.messages.where(type: "UserMessage").count
assert_equal 1, chat.messages.where(type: "AssistantMessage", status: "pending").count
end
end
@@ -35,8 +46,8 @@ class ChatTest < ActiveSupport::TestCase
assert_difference "@user.chats.count", 1 do
chat = @user.chats.start!(prompt, model: nil)
assert_equal 1, chat.messages.count
assert_equal Provider::Openai::DEFAULT_MODEL, chat.messages.first.ai_model
assert_equal 2, chat.messages.count
assert_equal Provider::Openai::DEFAULT_MODEL, chat.messages.find_by!(type: "UserMessage").ai_model
end
end
@@ -46,8 +57,8 @@ class ChatTest < ActiveSupport::TestCase
assert_difference "@user.chats.count", 1 do
chat = @user.chats.start!(prompt, model: "")
assert_equal 1, chat.messages.count
assert_equal Provider::Openai::DEFAULT_MODEL, chat.messages.first.ai_model
assert_equal 2, chat.messages.count
assert_equal Provider::Openai::DEFAULT_MODEL, chat.messages.find_by!(type: "UserMessage").ai_model
end
end
@@ -57,7 +68,7 @@ class ChatTest < ActiveSupport::TestCase
with_env_overrides OPENAI_MODEL: "custom-model" do
chat = @user.chats.start!(prompt, model: "")
assert_equal "custom-model", chat.messages.first.ai_model
assert_equal "custom-model", chat.messages.find_by!(type: "UserMessage").ai_model
end
end

View File

@@ -14,7 +14,7 @@ class UserMessageTest < ActiveSupport::TestCase
streams = capture_turbo_stream_broadcasts(@chat)
assert_equal 2, streams.size
assert_equal "append", streams.first["action"]
assert_equal "messages", streams.first["target"]
assert_equal @chat.messages_target, streams.first["target"]
assert_equal "update", streams.last["action"]
assert_equal "user_message_#{message.id}", streams.last["target"]
end