require "test_helper"

class LlmUsageTest < ActiveSupport::TestCase
  test "infer_provider returns anthropic for claude models" do
    assert_equal "anthropic", LlmUsage.infer_provider("claude-sonnet-4-6")
    assert_equal "anthropic", LlmUsage.infer_provider("claude-opus-4-7")
    assert_equal "anthropic", LlmUsage.infer_provider("claude-haiku-4-5")
  end

  test "infer_provider still returns openai for gpt models" do
    assert_equal "openai", LlmUsage.infer_provider("gpt-4.1")
    assert_equal "openai", LlmUsage.infer_provider("gpt-5")
  end

  test "infer_provider attributes Bedrock and Vertex prefixed IDs to anthropic" do
    assert_equal "anthropic", LlmUsage.infer_provider("anthropic.claude-sonnet-4-5-20250929-v1:0")
    assert_equal "anthropic", LlmUsage.infer_provider("anthropic.claude-opus-4-20250514-v1:0")
    assert_equal "anthropic", LlmUsage.infer_provider("anthropic/claude-3-5-sonnet@20240620")
  end

  test "calculate_cost returns nil for Bedrock IDs (no per-token rate stored)" do
    # Bedrock bills through AWS not Anthropic — we don't store a per-MTok rate,
    # but the row must still attribute to anthropic for provider filtering.
    assert_nil LlmUsage.calculate_cost(
      model: "anthropic.claude-sonnet-4-5-20250929-v1:0",
      prompt_tokens: 1000,
      completion_tokens: 500
    )
  end

  test "calculate_cost returns Anthropic pricing for Claude models" do
    cost = LlmUsage.calculate_cost(model: "claude-sonnet-4-6", prompt_tokens: 1_000_000, completion_tokens: 100_000)

    # 1M input * $3/MTok + 100K output * $15/MTok = $3.00 + $1.50 = $4.50
    assert_in_delta 4.5, cost, 0.0001
  end

  test "calculate_cost uses higher pricing for Opus" do
    cost = LlmUsage.calculate_cost(model: "claude-opus-4-7", prompt_tokens: 1_000_000, completion_tokens: 0)

    # 1M input * $15/MTok = $15.00
    assert_in_delta 15.0, cost, 0.0001
  end

  test "calculate_cost uses lower pricing for Haiku" do
    cost = LlmUsage.calculate_cost(model: "claude-haiku-4-5", prompt_tokens: 1_000_000, completion_tokens: 1_000_000)

    # $1 in + $5 out = $6.00
    assert_in_delta 6.0, cost, 0.0001
  end

  test "calculate_cost prices Anthropic cache tokens relative to the input rate" do
    # Sonnet input is $3/MTok → cache write 1.25x = $3.75/MTok, read 0.1x = $0.30/MTok.
    write = LlmUsage.calculate_cost(model: "claude-sonnet-4-6", prompt_tokens: 0, completion_tokens: 0, cache_creation_tokens: 1_000_000)
    assert_in_delta 3.75, write, 0.0001

    read = LlmUsage.calculate_cost(model: "claude-sonnet-4-6", prompt_tokens: 0, completion_tokens: 0, cache_read_tokens: 1_000_000)
    assert_in_delta 0.30, read, 0.0001
  end

  test "calculate_cost matches Anthropic's bill for a cached chat turn (issue #1984)" do
    # Real tokens from the review: ignoring cache tokens under-reports ($0.0328 vs $0.0355).
    cost = LlmUsage.calculate_cost(
      model: "claude-sonnet-4-6",
      prompt_tokens: 8082, completion_tokens: 572,
      cache_creation_tokens: 435, cache_read_tokens: 3502
    )
    assert_in_delta 0.035508, cost, 0.0001

    without_cache = LlmUsage.calculate_cost(model: "claude-sonnet-4-6", prompt_tokens: 8082, completion_tokens: 572)
    assert cost > without_cache, "cache tokens must add cost"
  end

  test "calculate_cost treats nil cache tokens as zero (OpenAI rows)" do
    # gpt-4.1 input is $2/MTok; nil cache columns must not blow up or add cost.
    cost = LlmUsage.calculate_cost(model: "gpt-4.1", prompt_tokens: 1_000_000, completion_tokens: 0, cache_creation_tokens: nil, cache_read_tokens: nil)
    assert_in_delta 2.0, cost, 0.0001
  end

  test "calculate_cost does not apply Anthropic cache pricing to non-Anthropic models" do
    # The 1.25x/0.1x cache multipliers are Anthropic's. If a non-Anthropic caller
    # ever passes cache counts, they must not be billed with the wrong rates.
    cost = LlmUsage.calculate_cost(
      model: "gpt-4.1", prompt_tokens: 0, completion_tokens: 0,
      cache_creation_tokens: 1_000_000, cache_read_tokens: 1_000_000
    )
    assert_in_delta 0.0, cost, 0.0001
  end
end