Add Langfuse-based LLM observability (#86)

* Add Langfuse-based LLM observability * Document Langfuse configuration * Don't hardcode model in use
2026-07-19 08:15:21 +00:00 · 2025-08-06 14:23:07 -07:00
parent 099425d240
commit f6dde1a098
7 changed files with 128 additions and 14 deletions
--- a/1
+++ b/1
@@ -80,6 +80,7 @@ gem "after_commit_everywhere", "~> 1.0"

 # AI
 gem "ruby-openai"
+gem "langfuse-ruby", "~> 0.1.4", require: "langfuse"

 group :development, :test do
  gem "debug", platforms: %i[mri windows]
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -274,6 +274,11 @@ GEM
    json (2.12.2)
    jwt (2.10.2)
      base64
+    langfuse-ruby (0.1.4)
+      concurrent-ruby (~> 1.0)
+      faraday (>= 1.8, < 3.0)
+      faraday-net_http (>= 1.0, < 4.0)
+      json (~> 2.0)
    language_server-protocol (3.17.0.5)
    launchy (3.1.1)
      addressable (~> 2.8)
@@ -649,6 +654,7 @@ DEPENDENCIES
  inline_svg
  jbuilder
  jwt
+  langfuse-ruby (~> 0.1.4)
  letter_opener
  logtail-rails
  lookbook (= 2.3.11)
@@ -697,4 +703,4 @@ RUBY VERSION
   ruby 3.4.4p34

 BUNDLED WITH
-   2.6.9
+   2.6.7
--- a/app/models/provider/openai.rb
+++ b/app/models/provider/openai.rb
@@ -14,27 +14,47 @@ class Provider::Openai < Provider
    MODELS.include?(model)
  end

-  def auto_categorize(transactions: [], user_categories: [])
+  def auto_categorize(transactions: [], user_categories: [], model: "gpt-4.1-mini")
    with_provider_response do
      raise Error, "Too many transactions to auto-categorize. Max is 25 per request." if transactions.size > 25

-      AutoCategorizer.new(
+      result = AutoCategorizer.new(
        client,
+        model: model,
        transactions: transactions,
        user_categories: user_categories
      ).auto_categorize
+
+      log_langfuse_generation(
+        name: "auto_categorize",
+        model: model,
+        input: { transactions: transactions, user_categories: user_categories },
+        output: result.map(&:to_h)
+      )
+
+      result
    end
  end

-  def auto_detect_merchants(transactions: [], user_merchants: [])
+  def auto_detect_merchants(transactions: [], user_merchants: [], model: "gpt-4.1-mini")
    with_provider_response do
      raise Error, "Too many transactions to auto-detect merchants. Max is 25 per request." if transactions.size > 25

-      AutoMerchantDetector.new(
+      result = AutoMerchantDetector.new(
        client,
+        model: model,
        transactions: transactions,
        user_merchants: user_merchants
      ).auto_detect_merchants
+
+      log_langfuse_generation(
+        name: "auto_detect_merchants",
+        model: model,
+        input: { transactions: transactions, user_merchants: user_merchants },
+        output: result.map(&:to_h)
+      )
+
+      result
    end
  end

@@ -61,9 +81,11 @@ class Provider::Openai < Provider
        nil
      end

+      input_payload = chat_config.build_input(prompt)
+
      raw_response = client.responses.create(parameters: {
        model: model,
-        input: chat_config.build_input(prompt),
+        input: input_payload,
        instructions: instructions,
        tools: chat_config.tools,
        previous_response_id: previous_response_id,
@@ -74,13 +96,50 @@ class Provider::Openai < Provider
      # for the "response chunk" in the stream and return it (it is already parsed)
      if stream_proxy.present?
        response_chunk = collected_chunks.find { |chunk| chunk.type == "response" }
-        response_chunk.data
+        response = response_chunk.data
+        log_langfuse_generation(
+          name: "chat_response",
+          model: model,
+          input: input_payload,
+          output: response.messages.map(&:output_text).join("\n")
+        )
+        response
      else
-        ChatParser.new(raw_response).parsed
+        parsed = ChatParser.new(raw_response).parsed
+        log_langfuse_generation(
+          name: "chat_response",
+          model: model,
+          input: input_payload,
+          output: parsed.messages.map(&:output_text).join("\n"),
+          usage: raw_response["usage"]
+        )
+        parsed
      end
    end
  end

  private
    attr_reader :client
+
+    def langfuse_client
+      return unless ENV["LANGFUSE_PUBLIC_KEY"].present? && ENV["LANGFUSE_SECRET_KEY"].present?
+
+      @langfuse_client = Langfuse.new
+    end
+
+    def log_langfuse_generation(name:, model:, input:, output:, usage: nil)
+      return unless langfuse_client
+
+      trace = langfuse_client.trace(name: "openai.#{name}", input: input)
+      trace.generation(
+        name: name,
+        model: model,
+        input: input,
+        output: output,
+        usage: usage
+      )
+      trace.update(output: output)
+    rescue => e
+      Rails.logger.warn("Langfuse logging failed: #{e.message}")
+    end
 end
--- a/app/models/provider/openai/auto_categorizer.rb
+++ b/app/models/provider/openai/auto_categorizer.rb
@@ -1,13 +1,14 @@
 class Provider::Openai::AutoCategorizer
-  def initialize(client, transactions: [], user_categories: [])
+  def initialize(client, model: "", transactions: [], user_categories: [])
    @client = client
+    @model = model
    @transactions = transactions
    @user_categories = user_categories
  end

  def auto_categorize
    response = client.responses.create(parameters: {
-      model: "gpt-4.1-mini",
+      model: model,
      input: [ { role: "developer", content: developer_message } ],
      text: {
        format: {
@@ -26,7 +27,7 @@ class Provider::Openai::AutoCategorizer
  end

  private
-    attr_reader :client, :transactions, :user_categories
+    attr_reader :client, :model, :transactions, :user_categories

    AutoCategorization = Provider::LlmConcept::AutoCategorization

--- a/app/models/provider/openai/auto_merchant_detector.rb
+++ b/app/models/provider/openai/auto_merchant_detector.rb
@@ -1,13 +1,14 @@
 class Provider::Openai::AutoMerchantDetector
-  def initialize(client, transactions:, user_merchants:)
+  def initialize(client, model: "", transactions:, user_merchants:)
    @client = client
+    @model = model
    @transactions = transactions
    @user_merchants = user_merchants
  end

  def auto_detect_merchants
    response = client.responses.create(parameters: {
-      model: "gpt-4.1-mini",
+      model: model,
      input: [ { role: "developer", content: developer_message } ],
      text: {
        format: {
@@ -26,7 +27,7 @@ class Provider::Openai::AutoMerchantDetector
  end

  private
-    attr_reader :client, :transactions, :user_merchants
+    attr_reader :client, :model, :transactions, :user_merchants

    AutoDetectedMerchant = Provider::LlmConcept::AutoDetectedMerchant

--- a/config/initializers/langfuse.rb
+++ b/config/initializers/langfuse.rb
@@ -0,0 +1,9 @@
+require "langfuse"
+
+if ENV["LANGFUSE_PUBLIC_KEY"].present? && ENV["LANGFUSE_SECRET_KEY"].present?
+  Langfuse.configure do |config|
+    config.public_key = ENV["LANGFUSE_PUBLIC_KEY"]
+    config.secret_key = ENV["LANGFUSE_SECRET_KEY"]
+    config.host = ENV["LANGFUSE_HOST"] if ENV["LANGFUSE_HOST"].present?
+  end
+end
--- a/docs/hosting/langfuse.md
+++ b/docs/hosting/langfuse.md
@@ -0,0 +1,37 @@
+# Langfuse
+
+This app can send traces of all LLM interactions to [Langfuse](https://langfuse.com) for debugging and usage analytics.  Find them here [on
+GitHub](https://github.com/langfuse/langfuse) and look at their [Open
+Source statement](https://langfuse.com/open-source).
+
+## Prerequisites
+
+1. Create a Langfuse project (self‑hosted or using their cloud offering).
+2. Copy the **public key** and **secret key** from the project's settings.
+
+## Configuration
+
+Set the following environment variables for the Rails app:
+
+```bash
+LANGFUSE_PUBLIC_KEY=your_public_key
+LANGFUSE_SECRET_KEY=your_secret_key
+# Optional if self‑hosting or using a non‑default domain
+LANGFUSE_HOST=https://your-langfuse-domain.com
+```
+
+In Docker setups, add the variables to `compose.yml` and the accompanying `.env` file.
+
+The initializer reads these values on boot and automatically enables tracing. If the keys are absent, the app runs normally without Langfuse.
+
+## What Gets Tracked
+
+* `chat_response`
+* `auto_categorize`
+* `auto_detect_merchants`
+
+Each call records the prompt, model, response, and token usage when available.
+
+## Viewing Traces
+
+After starting the app with the variables set, visit your Langfuse dashboard to see traces and generations grouped under the `openai.*` traces.