Wire conversation history through OpenAI responses API

2026-04-19 12:04:08 +00:00 · 2026-04-06 00:11:47 +02:00
parent 0dd3990502
commit d950018321
8 changed files with 176 additions and 40 deletions
--- a/.env.local.example
+++ b/.env.local.example
@@ -28,8 +28,16 @@ TWELVE_DATA_API_KEY =
 OPENAI_ACCESS_TOKEN =
 OPENAI_URI_BASE =
 OPENAI_MODEL =
+# OPENAI_DEFAULT_MODEL: The default model to use for OpenAI
+# OPENAI_SUPPORTED_MODELS: The supported models for OpenAI
+# OPENAI_SUPPORTED_VISION_MODELS: OpenAI models that support vision
+# OPENAI_LLM_JSON_MODE / LLM_JSON_MODE: The JSON mode to use for AI responses
+# OPENAI_CUSTOM_PROVIDER: If you are using a custom provider that is fully OpenAI compatible set this to false
+# OPENAI_SUPPORTS_RESPONSES_ENDPOINT: If your custom provider supports OpenAI responses api set this to true
 # OPENAI_REQUEST_TIMEOUT: Request timeout in seconds (default: 60)
 # OPENAI_SUPPORTS_PDF_PROCESSING: Set to false for endpoints without vision support (default: true)
+# OPENAI_MODEL_PROMPT_PRICE: The price to use for the selected model prompts
+# OPENAI_MODEL_COMPLETION_PRICE: The price to use for the selected model completion

 # (example: LM Studio/Docker config) OpenAI-compatible API endpoint config
 # OPENAI_URI_BASE = http://host.docker.internal:1234/
--- a/app/models/assistant/responder.rb
+++ b/app/models/assistant/responder.rb
@@ -79,6 +79,7 @@ class Assistant::Responder
        instructions: instructions,
        functions: function_tool_caller.function_definitions,
        function_results: function_results,
+        messages: conversation_history,
        streamer: streamer,
        previous_response_id: previous_response_id,
        session_id: chat_session_id,
@@ -114,4 +115,42 @@ class Assistant::Responder
    def chat
      @chat ||= message.chat
    end
+
+    def conversation_history
+      messages = []
+      return messages unless chat&.messages
+
+      chat.messages.eager_load(:tool_calls).where(type: [ "UserMessage", "AssistantMessage" ]).ordered.each do |chat_message|
+        if chat_message.tool_calls.any?
+          messages << {
+            role: chat_message.role,
+            content: chat_message.content || "",
+            tool_calls: chat_message.tool_calls.map(&:to_tool_call)
+          }
+
+          chat_message.tool_calls.map(&:to_result).each do |fn_result|
+            # Handle nil explicitly to avoid serializing to "null"
+            output = fn_result[:output]
+            content = if output.nil?
+              ""
+            elsif output.is_a?(String)
+              output
+            else
+              output.to_json
+            end
+
+            messages << {
+              role: "tool",
+              tool_call_id: fn_result[:call_id],
+              name: fn_result[:name],
+              content: content
+            }
+          end
+
+        elsif !chat_message.content.blank?
+          messages << { role: chat_message.role, content: chat_message.content || "" }
+        end
+      end
+      messages
+    end
 end
--- a/app/models/chat.rb
+++ b/app/models/chat.rb
@@ -26,9 +26,9 @@ class Chat < ApplicationRecord
    end

    # Returns the default AI model to use for chats
-    # Priority: ENV variable > Setting > OpenAI default
+    # Priority: AI Config > Setting
    def default_model
-      ENV["OPENAI_MODEL"].presence || Setting.openai_model.presence || Provider::Openai::DEFAULT_MODEL
+      Provider::Openai.effective_model.presence || Setting.openai_model
    end
  end

--- a/app/models/provider/llm_concept.rb
+++ b/app/models/provider/llm_concept.rb
@@ -40,6 +40,7 @@ module Provider::LlmConcept
    instructions: nil,
    functions: [],
    function_results: [],
+    messages: nil,
    streamer: nil,
    previous_response_id: nil,
    session_id: nil,
--- a/app/models/provider/openai.rb
+++ b/app/models/provider/openai.rb
@@ -4,31 +4,53 @@ class Provider::Openai < Provider
  # Subclass so errors caught in this provider are raised as Provider::Openai::Error
  Error = Class.new(Provider::Error)

-  # Supported OpenAI model prefixes (e.g., "gpt-4" matches "gpt-4", "gpt-4.1", "gpt-4-turbo", etc.)
-  DEFAULT_OPENAI_MODEL_PREFIXES = %w[gpt-4 gpt-5 o1 o3]
-  DEFAULT_MODEL = "gpt-4.1"
+  LLM_FILE_PATH = Rails.root.join("config", "llm.yml")
+
+  def self.llm_config
+    @llm_config ||= (YAML.safe_load(
+      ERB.new(File.read(LLM_FILE_PATH)).result,
+      permitted_classes: [],
+      permitted_symbols: [],
+      aliases: true
+    ) || {}).with_indifferent_access.freeze
+  end
+
+  def self.active_provider
+    provider = llm_config.fetch(:active_provider, "openai")
+    @active_provider ||= (llm_config.dig("providers", provider) || {}).with_indifferent_access.freeze
+  end

  # Models that support PDF/vision input (not all OpenAI models have vision capabilities)
-  VISION_CAPABLE_MODEL_PREFIXES = %w[gpt-4o gpt-4-turbo gpt-4.1 gpt-5 o1 o3].freeze

  # Returns the effective model that would be used by the provider
  # Uses the same logic as Provider::Registry and the initializer
  def self.effective_model
-    configured_model = ENV.fetch("OPENAI_MODEL", Setting.openai_model)
-    configured_model.presence || DEFAULT_MODEL
+    configured_model = ENV.fetch("OPENAI_MODEL") do
+      active_provider.fetch(:model, Setting.openai_model)
+    end
+    configured_model.presence || active_provider[:default_model]
+  end
+
+  DEFAULT_MODEL = self.active_provider[:default_model]
+  VISION_CAPABLE_MODEL_PREFIXES = self.active_provider[:supported_vision_models].to_s.split(/\s+/).freeze
+
+  def active_provider
+    self.class.active_provider
  end

  def initialize(access_token, uri_base: nil, model: nil)
-    client_options = { access_token: access_token }
-    client_options[:uri_base] = uri_base if uri_base.present?
+    client_options = { access_token: access_token || active_provider[:access_token] }
+    llm_uri_base = uri_base.presence || active_provider[:uri_base]
+    llm_model = model.presence || active_provider[:model]
+    client_options[:uri_base] = llm_uri_base if llm_uri_base.present?
    client_options[:request_timeout] = ENV.fetch("OPENAI_REQUEST_TIMEOUT", 60).to_i

    @client = ::OpenAI::Client.new(**client_options)
-    @uri_base = uri_base
-    if custom_provider? && model.blank?
+    @uri_base = llm_uri_base
+    if custom_provider? && llm_model.blank?
      raise Error, "Model is required when using a custom OpenAI‑compatible provider"
    end
-    @default_model = model.presence || DEFAULT_MODEL
+    @default_model = llm_model.presence || self.class.effective_model
  end

  def supports_model?(model)
@@ -36,7 +58,18 @@ class Provider::Openai < Provider
    return true if custom_provider?

    # Otherwise, check if model starts with any supported OpenAI prefix
-    DEFAULT_OPENAI_MODEL_PREFIXES.any? { |prefix| model.start_with?(prefix) }
+    active_provider[:supported_models].split(/\s+/).any? { |prefix| model.start_with?(prefix) }
+  end
+
+  def supports_responses_endpoint?
+    return @supports_responses_endpoint if defined?(@supports_responses_endpoint)
+
+    supports_responses = active_provider[:supports_responses_endpoint]
+    if supports_responses.to_s.present?
+      return @supports_responses_endpoint = ActiveModel::Type::Boolean.new.cast(supports_responses)
+    end
+
+    @supports_responses_endpoint = !custom_provider?
  end

  def provider_name
@@ -47,11 +80,14 @@ class Provider::Openai < Provider
    if custom_provider?
      @default_model.present? ? "configured model: #{@default_model}" : "any model"
    else
-      "models starting with: #{DEFAULT_OPENAI_MODEL_PREFIXES.join(', ')}"
+      prefixes = self.active_provider[:supported_models].to_s.split(/\s+/).join(", ")
+      "models starting with: #{prefixes}"
    end
  end

  def custom_provider?
+    value = active_provider[:custom_provider]
+    return ActiveModel::Type::Boolean.new.cast(value)  unless value.to_s.blank?
    @uri_base.present?
  end

@@ -207,37 +243,40 @@ class Provider::Openai < Provider
    instructions: nil,
    functions: [],
    function_results: [],
+    messages: nil,
    streamer: nil,
    previous_response_id: nil,
    session_id: nil,
    user_identifier: nil,
    family: nil
  )
-    if custom_provider?
-      generic_chat_response(
-        prompt: prompt,
-        model: model,
-        instructions: instructions,
-        functions: functions,
-        function_results: function_results,
-        streamer: streamer,
-        session_id: session_id,
-        user_identifier: user_identifier,
-        family: family
-      )
-    else
+    if supports_responses_endpoint?
      native_chat_response(
        prompt: prompt,
        model: model,
        instructions: instructions,
        functions: functions,
        function_results: function_results,
+        messages: messages,
        streamer: streamer,
        previous_response_id: previous_response_id,
        session_id: session_id,
        user_identifier: user_identifier,
        family: family
      )
+    else
+      generic_chat_response(
+        prompt: prompt,
+        model: model,
+        instructions: instructions,
+        functions: functions,
+        function_results: function_results,
+        messages: messages,
+        streamer: streamer,
+        session_id: session_id,
+        user_identifier: user_identifier,
+        family: family
+      )
    end
  end

@@ -250,6 +289,7 @@ class Provider::Openai < Provider
      instructions: nil,
      functions: [],
      function_results: [],
+      messages: nil,
      streamer: nil,
      previous_response_id: nil,
      session_id: nil,
@@ -278,7 +318,10 @@ class Provider::Openai < Provider
          nil
        end

-        input_payload = chat_config.build_input(prompt)
+        input_payload = chat_config.build_input(
+          prompt: prompt,
+          messages: messages
+        )

        begin
          raw_response = client.responses.create(parameters: {
@@ -344,6 +387,7 @@ class Provider::Openai < Provider
      instructions: nil,
      functions: [],
      function_results: [],
+      messages: nil,
      streamer: nil,
      session_id: nil,
      user_identifier: nil,
@@ -353,7 +397,8 @@ class Provider::Openai < Provider
        messages = build_generic_messages(
          prompt: prompt,
          instructions: instructions,
-          function_results: function_results
+          function_results: function_results,
+          messages: messages
        )

        tools = build_generic_tools(functions)
@@ -412,16 +457,20 @@ class Provider::Openai < Provider
      end
    end

-    def build_generic_messages(prompt:, instructions: nil, function_results: [])
-      messages = []
+    def build_generic_messages(prompt:, instructions: nil, function_results: [], messages: nil)
+      payload = []

      # Add system message if instructions present
      if instructions.present?
-        messages << { role: "system", content: instructions }
+        payload << { role: "system", content: instructions }
      end

-      # Add user prompt
-      messages << { role: "user", content: prompt }
+      # Add conversation history or user prompt
+      if messages.present?
+        payload.concat(messages)
+      elsif prompt.present?
+        payload << { role: "user", content: prompt }
+      end

      # If there are function results, we need to add the assistant message that made the tool calls
      # followed by the tool messages with the results
@@ -442,7 +491,7 @@ class Provider::Openai < Provider
          }
        end

-        messages << {
+        payload << {
          role: "assistant",
          content: "",  # Some OpenAI-compatible APIs require string, not null
          tool_calls: tool_calls
@@ -462,7 +511,7 @@ class Provider::Openai < Provider
            output.to_json
          end

-          messages << {
+          payload << {
            role: "tool",
            tool_call_id: fn_result[:call_id],
            name: fn_result[:name],
@@ -471,7 +520,7 @@ class Provider::Openai < Provider
        end
      end

-      messages
+      payload
    end

    def build_generic_tools(functions)
--- a/app/models/provider/openai/chat_config.rb
+++ b/app/models/provider/openai/chat_config.rb
@@ -16,7 +16,15 @@ class Provider::Openai::ChatConfig
    end
  end

-  def build_input(prompt)
+  def build_input(prompt: nil, messages: nil)
+    input_messages = []
+
+    if messages.present?
+      input_messages.concat(messages)
+    elsif prompt.present?
+      input_messages << { role: "user", content: prompt }
+    end
+
    results = function_results.map do |fn_result|
      # Handle nil explicitly to avoid serializing to "null"
      output = fn_result[:output]
@@ -36,7 +44,7 @@ class Provider::Openai::ChatConfig
    end

    [
-      { role: "user", content: prompt },
+      *input_messages,
      *results
    ]
  end
--- a/config/llm.yml
+++ b/config/llm.yml
@@ -0,0 +1,24 @@
+active_provider: <%= ENV.fetch("LLM_ACTIVE_PROVIDER") { "openai" } %>
+debug: <%= ENV.fetch("AI_DEBUG_MODE") { "false" } %>
+providers:
+  openai:
+    access_token: <%= ENV.fetch("OPENAI_ACCESS_TOKEN") { nil } %>
+    model:  <%= ENV.fetch("OPENAI_MODEL") { nil } %>
+    default_model:  <%= ENV.fetch("OPENAI_DEFAULT_MODEL") { "gpt-4.1" } %>
+    # Supported OpenAI model prefixes (e.g., "gpt-4" matches "gpt-4", "gpt-4.1", "gpt-4-turbo", etc.)
+    supported_models: <%= ENV.fetch("OPENAI_SUPPORTED_MODELS") {  "gpt-4 gpt-5 o1 o3" } %>
+    supported_vision_models:  <%= ENV.fetch("OPENAI_SUPPORTED_VISION_MODELS") { "gpt-4o gpt-4-turbo gpt-4.1 gpt-5 o1 o3" } %>
+    uri_base: <%= ENV.fetch("OPENAI_URI_BASE") { nil } %>
+    # - "auto": Tries strict first, falls back to none if >50% fail (recommended default)
+    # - "strict": Best for thinking models (qwen-thinking, deepseek-reasoner) - skips verbose <think> tags
+    # - "none": Best for non-thinking models (gpt-oss, llama, mistral) - allows reasoning in output
+    # - "json_object": Middle ground, broader compatibility than strict
+    json_mode:  <%= ENV.fetch("OPENAI_LLM_JSON_MODE") { ENV.fetch("LLM_JSON_MODE") { nil } } %>
+    # custom / native
+    custom_provider: <%= ENV.fetch("OPENAI_CUSTOM_PROVIDER") { nil } %>
+    supports_responses_endpoint: <%= ENV.fetch("OPENAI_SUPPORTS_RESPONSES_ENDPOINT") { nil } %>
+    supports_pdf_processing: <%= ENV.fetch("OPENAI_SUPPORTS_PDF_PROCESSING") { nil } %>
+    pricing:
+      '<%= ENV.fetch("OPENAI_MODEL") { ENV.fetch("OPENAI_DEFAULT_MODEL") { "gpt-4.1" } } %>':
+        prompt: <%= ENV.fetch("OPENAI_MODEL_PROMPT_PRICE") { "2.00" }  %>
+        completion: <%= ENV.fetch("OPENAI_MODEL_COMPLETION_PRICE") { "8.00" }  %>
--- a/test/models/assistant_test.rb
+++ b/test/models/assistant_test.rb
@@ -14,6 +14,10 @@ class AssistantTest < ActiveSupport::TestCase
    @provider = mock
    @expected_session_id = @chat.id.to_s
    @expected_user_identifier = ::Digest::SHA256.hexdigest(@chat.user_id.to_s)
+    @expected_conversation_history = [
+      {role: "user", content: "Can you help me understand my spending habits?"},
+      {role: "user", content: "What is my net worth?"}
+    ]
  end

  test "errors get added to chat" do
@@ -100,6 +104,7 @@ class AssistantTest < ActiveSupport::TestCase
    @provider.expects(:chat_response).with do |message, **options|
      assert_equal @expected_session_id, options[:session_id]
      assert_equal @expected_user_identifier, options[:user_identifier]
+      assert_equal @expected_conversation_history, options[:messages]
      text_chunks.each do |text_chunk|
        options[:streamer].call(text_chunk)
      end
@@ -154,6 +159,7 @@ class AssistantTest < ActiveSupport::TestCase
    @provider.expects(:chat_response).with do |message, **options|
      assert_equal @expected_session_id, options[:session_id]
      assert_equal @expected_user_identifier, options[:user_identifier]
+      assert_equal @expected_conversation_history, options[:messages]
      call2_text_chunks.each do |text_chunk|
        options[:streamer].call(text_chunk)
      end
@@ -165,6 +171,7 @@ class AssistantTest < ActiveSupport::TestCase
    @provider.expects(:chat_response).with do |message, **options|
      assert_equal @expected_session_id, options[:session_id]
      assert_equal @expected_user_identifier, options[:user_identifier]
+      assert_equal @expected_conversation_history, options[:messages]
      options[:streamer].call(call1_response_chunk)
      true
    end.returns(call1_response).once.in_sequence(sequence)