Files
sure/app/models/eval/langfuse/client.rb
soky srm 88952e4714 Small llms improvements (#400)
* Initial implementation

* FIX keys

* Add langfuse evals support

* FIX trace upload

* Delete .claude/settings.local.json

Signed-off-by: soky srm <sokysrm@gmail.com>

* Update client.rb

* Small LLMs improvements

* Keep batch size normal

* Update categorizer

* FIX json mode

* Add reasonable alternative to matching

* FIX thinking blocks for llms

* Implement json mode support with AUTO mode

* Make auto default for everyone

* FIX linter

* Address review

* Allow export manual categories

* FIX user export

* FIX oneshot example pollution

* Update categorization_golden_v1.yml

* Update categorization_golden_v1.yml

* Trim to 100 items

* Update auto_categorizer.rb

* FIX for auto retry in auto mode

* Separate the Eval Logic from the Auto-Categorizer

The expected_null_count parameter conflates eval-specific logic with production categorization logic.

* Force json mode on evals

* Introduce a more mixed dataset

150 items, performance from a local model:

By Difficulty:
  easy: 93.22% accuracy (55/59)
  medium: 93.33% accuracy (42/45)
  hard: 92.86% accuracy (26/28)
  edge_case: 100.0% accuracy (18/18)

* Improve datasets

Remove Data leakage from prompts

* Create eval runs as "pending"

---------

Signed-off-by: soky srm <sokysrm@gmail.com>
Signed-off-by: Juan José Mata <juanjo.mata@gmail.com>
Co-authored-by: Juan José Mata <juanjo.mata@gmail.com>
2025-12-07 18:11:34 +01:00

227 lines
6.6 KiB
Ruby

class Eval::Langfuse::Client
BASE_URLS = {
us: "https://us.cloud.langfuse.com/api/public",
eu: "https://cloud.langfuse.com/api/public"
}.freeze
class Error < StandardError; end
class ConfigurationError < Error; end
class ApiError < Error
attr_reader :status, :body
def initialize(message, status: nil, body: nil)
super(message)
@status = status
@body = body
end
end
def initialize(public_key: nil, secret_key: nil, region: nil, host: nil)
@public_key = public_key || ENV["LANGFUSE_PUBLIC_KEY"]
@secret_key = secret_key || ENV["LANGFUSE_SECRET_KEY"]
@base_url = determine_base_url(region, host)
validate_configuration!
end
# Dataset operations
def create_dataset(name:, description: nil, metadata: {})
post("/v2/datasets", {
name: name,
description: description,
metadata: metadata
}.compact)
end
def get_dataset(name:)
get("/v2/datasets/#{encode(name)}")
end
def list_datasets(page: 1, limit: 50)
get("/v2/datasets", page: page, limit: limit)
end
# Dataset item operations
def create_dataset_item(dataset_name:, input:, expected_output: nil, metadata: {}, id: nil)
post("/dataset-items", {
datasetName: dataset_name,
id: id,
input: input,
expectedOutput: expected_output,
metadata: metadata
}.compact)
end
def get_dataset_items(dataset_name:, page: 1, limit: 50)
get("/dataset-items", datasetName: dataset_name, page: page, limit: limit)
end
# Dataset run operations (for experiments)
def create_dataset_run_item(run_name:, dataset_item_id:, trace_id: nil, observation_id: nil, metadata: {})
post("/dataset-run-items", {
runName: run_name,
datasetItemId: dataset_item_id,
traceId: trace_id,
observationId: observation_id,
metadata: metadata
}.compact)
end
# Trace operations
def create_trace(name:, input: nil, output: nil, metadata: {}, session_id: nil, user_id: nil)
# Generate trace ID upfront so we can return it
trace_id = SecureRandom.uuid
post("/ingestion", {
batch: [
{
id: SecureRandom.uuid,
type: "trace-create",
timestamp: Time.current.iso8601,
body: {
id: trace_id,
name: name,
input: input,
output: output,
metadata: metadata,
sessionId: session_id,
userId: user_id
}.compact
}
]
})
# Return the trace ID we generated
trace_id
end
# Score operations
def create_score(trace_id:, name:, value:, comment: nil, data_type: "NUMERIC")
post("/ingestion", {
batch: [
{
id: SecureRandom.uuid,
type: "score-create",
timestamp: Time.current.iso8601,
body: {
id: SecureRandom.uuid,
traceId: trace_id,
name: name,
value: value,
comment: comment,
dataType: data_type
}.compact
}
]
})
end
def configured?
@public_key.present? && @secret_key.present?
end
private
def determine_base_url(region, host)
# Priority: explicit host > LANGFUSE_HOST env > region > LANGFUSE_REGION env > default (eu)
if host.present?
host.chomp("/") + "/api/public"
elsif ENV["LANGFUSE_HOST"].present?
ENV["LANGFUSE_HOST"].chomp("/") + "/api/public"
elsif region.present?
BASE_URLS[region.to_sym] || BASE_URLS[:eu]
elsif ENV["LANGFUSE_REGION"].present?
BASE_URLS[ENV["LANGFUSE_REGION"].to_sym] || BASE_URLS[:eu]
else
# Default to EU as it's more common
BASE_URLS[:eu]
end
end
def validate_configuration!
return if configured?
raise ConfigurationError, <<~MSG
Langfuse credentials not configured.
Set LANGFUSE_PUBLIC_KEY and LANGFUSE_SECRET_KEY environment variables,
or pass public_key and secret_key to the client.
MSG
end
def get(path, params = {})
uri = build_uri(path, params)
request = Net::HTTP::Get.new(uri)
execute_request(uri, request)
end
def post(path, body)
uri = build_uri(path)
request = Net::HTTP::Post.new(uri)
request.body = body.to_json
request["Content-Type"] = "application/json"
execute_request(uri, request)
end
def build_uri(path, params = {})
uri = URI("#{@base_url}#{path}")
uri.query = URI.encode_www_form(params) if params.any?
uri
end
def execute_request(uri, request, retries: 3)
request.basic_auth(@public_key, @secret_key)
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true
http.read_timeout = 30
http.open_timeout = 10
# Fix for OpenSSL 3.x CRL checking issues
# See: https://github.com/ruby/openssl/issues/619
http.verify_mode = OpenSSL::SSL::VERIFY_PEER
if OpenSSL::OPENSSL_VERSION_NUMBER >= 0x30000000
# Disable CRL checking which can fail on some certificates
http.verify_callback = ->(_preverify_ok, _store_ctx) { true }
end
response = http.request(request)
case response.code.to_i
when 200..299
JSON.parse(response.body) rescue {}
when 401
raise ApiError.new("Unauthorized - check your Langfuse API keys", status: 401, body: response.body)
when 404
raise ApiError.new("Resource not found", status: 404, body: response.body)
when 409
# Conflict - resource already exists, which is okay for idempotent operations
JSON.parse(response.body) rescue {}
when 429
# Rate limited - retry with exponential backoff
if retries > 0
retry_after = response["Retry-After"]&.to_i || (2 ** (3 - retries))
Rails.logger.info("[Langfuse] Rate limited, waiting #{retry_after}s before retry...")
sleep(retry_after)
execute_request(uri, rebuild_request(request), retries: retries - 1)
else
raise ApiError.new("Rate limit exceeded after retries", status: 429, body: response.body)
end
else
raise ApiError.new("API error: #{response.code} - #{response.body}", status: response.code.to_i, body: response.body)
end
end
def rebuild_request(original_request)
# Create a new request with the same properties (needed for retry since request body may be consumed)
uri = URI(original_request.uri.to_s)
new_request = original_request.class.new(uri)
original_request.each_header { |key, value| new_request[key] = value }
new_request.body = original_request.body
new_request
end
def encode(value)
ERB::Util.url_encode(value)
end
end