sure/app/models/eval/sample.rb

class Eval::Sample < ApplicationRecord
  self.table_name = "eval_samples"

  belongs_to :dataset, class_name: "Eval::Dataset", foreign_key: :eval_dataset_id
  has_many :results, class_name: "Eval::Result", foreign_key: :eval_sample_id, dependent: :destroy

  validates :input_data, :expected_output, presence: true
  validates :difficulty, inclusion: { in: %w[easy medium hard manual edge_case] }

  scope :easy, -> { where(difficulty: "easy") }
  scope :medium, -> { where(difficulty: "medium") }
  scope :hard, -> { where(difficulty: "hard") }
  scope :edge_cases, -> { where(difficulty: "edge_case") }
  scope :with_tag, ->(tag) { where("? = ANY(tags)", tag) }
  scope :with_any_tags, ->(tags) { where("tags && ARRAY[?]::varchar[]", tags) }

  # Convert to format expected by AutoCategorizer
  def to_transaction_input
    input_data.deep_symbolize_keys
  end

  # Get categories from context (for categorization evals)
  def categories_context
    context_data.dig("categories") || []
  end

  # Get merchants from context (for merchant detection evals)
  def merchants_context
    context_data.dig("merchants") || []
  end

  # Get mock data from context (for chat evals)
  def mock_data
    context_data.dig("mock_data") || input_data.dig("mock_data") || {}
  end

  # Get the chat prompt (for chat evals)
  def chat_prompt
    input_data.dig("prompt") || input_data["prompt"]
  end

  # Get expected functions (for chat evals)
  def expected_functions
    expected_output.dig("functions") || expected_output["functions"] || []
  end

  # Get expected response keywords (for chat evals)
  def expected_response_contains
    expected_output.dig("response_contains") || expected_output["response_contains"] || []
  end

  # Get expected category name (for categorization evals)
  def expected_category_name
    expected_output.dig("category_name") || expected_output["category_name"]
  end

  # Get acceptable alternative category names (for categorization evals)
  # These are categories that are also considered correct answers
  def acceptable_alternatives
    expected_output.dig("acceptable_alternatives") || expected_output["acceptable_alternatives"] || []
  end

  # Get all acceptable category names (primary + alternatives)
  def all_acceptable_categories
    [ expected_category_name, *acceptable_alternatives ].compact
  end

  # Get expected merchant info (for merchant detection evals)
  def expected_business_name
    expected_output.dig("business_name") || expected_output["business_name"]
  end

  def expected_business_url
    expected_output.dig("business_url") || expected_output["business_url"]
  end

  # Check if null is expected
  def expects_null?
    case dataset.eval_type
    when "categorization"
      expected_category_name.nil?
    when "merchant_detection"
      expected_business_name.nil? && expected_business_url.nil?
    else
      false
    end
  end
end