mirror of
https://github.com/we-promise/sure.git
synced 2026-05-30 07:49:01 +00:00
feat(ai): add Anthropic batch ops + LLM cost ledger (2/5)
Implements auto_categorize, auto_detect_merchants, and
enhance_provider_merchants on Provider::Anthropic via forced tool calls,
plus the cost-ledger plumbing they need.
- Provider::Anthropic::AutoCategorizer, AutoMerchantDetector,
ProviderMerchantEnhancer each define a single output tool whose
input_schema mirrors the desired output, then force the model to call
it via tool_choice: { type: "tool", name: ..., disable_parallel_tool_use: true }.
Anthropic guarantees the tool_use.input matches the schema, so there
is no JSON parsing fragility, no <think> tag stripping, and no
json_object/json_schema fallback ladders.
- Concerns::UsageRecorder mirrors the OpenAI sibling but persists
cache_creation_input_tokens / cache_read_input_tokens to dedicated
columns instead of metadata.
- Migration adds cache_creation_tokens, cache_read_tokens (nullable
integers) to llm_usages. OpenAI rows leave them null.
- LlmUsage::PRICING gains Claude 4.x rows (opus-4-7 $15/$75, sonnet-4-6
$3/$15, haiku-4-5 $1/$5 per MTok). infer_provider returns "anthropic"
for claude-* via the existing exact/prefix lookup.
- Provider::Anthropic#chat_response now persists cache columns directly
rather than stashing them in metadata.
- 25-transaction batch cap mirrors the OpenAI provider so the cost
ledger sees the same shape regardless of which provider ran a batch.
Tests cover the forced-tool-call path, null/None normalization,
case-insensitive merchant matching, the missing-tool_use error path,
and Anthropic-specific pricing + provider inference on LlmUsage.
Stacked on #1983 (PR 1/5). 3/5 PDF + vision next.
This commit is contained in:
35
test/models/llm_usage_test.rb
Normal file
35
test/models/llm_usage_test.rb
Normal file
@@ -0,0 +1,35 @@
|
||||
require "test_helper"
|
||||
|
||||
class LlmUsageTest < ActiveSupport::TestCase
|
||||
test "infer_provider returns anthropic for claude models" do
|
||||
assert_equal "anthropic", LlmUsage.infer_provider("claude-sonnet-4-6")
|
||||
assert_equal "anthropic", LlmUsage.infer_provider("claude-opus-4-7")
|
||||
assert_equal "anthropic", LlmUsage.infer_provider("claude-haiku-4-5")
|
||||
end
|
||||
|
||||
test "infer_provider still returns openai for gpt models" do
|
||||
assert_equal "openai", LlmUsage.infer_provider("gpt-4.1")
|
||||
assert_equal "openai", LlmUsage.infer_provider("gpt-5")
|
||||
end
|
||||
|
||||
test "calculate_cost returns Anthropic pricing for Claude models" do
|
||||
cost = LlmUsage.calculate_cost(model: "claude-sonnet-4-6", prompt_tokens: 1_000_000, completion_tokens: 100_000)
|
||||
|
||||
# 1M input * $3/MTok + 100K output * $15/MTok = $3.00 + $1.50 = $4.50
|
||||
assert_in_delta 4.5, cost, 0.0001
|
||||
end
|
||||
|
||||
test "calculate_cost uses higher pricing for Opus" do
|
||||
cost = LlmUsage.calculate_cost(model: "claude-opus-4-7", prompt_tokens: 1_000_000, completion_tokens: 0)
|
||||
|
||||
# 1M input * $15/MTok = $15.00
|
||||
assert_in_delta 15.0, cost, 0.0001
|
||||
end
|
||||
|
||||
test "calculate_cost uses lower pricing for Haiku" do
|
||||
cost = LlmUsage.calculate_cost(model: "claude-haiku-4-5", prompt_tokens: 1_000_000, completion_tokens: 1_000_000)
|
||||
|
||||
# $1 in + $5 out = $6.00
|
||||
assert_in_delta 6.0, cost, 0.0001
|
||||
end
|
||||
end
|
||||
124
test/models/provider/anthropic/auto_categorizer_test.rb
Normal file
124
test/models/provider/anthropic/auto_categorizer_test.rb
Normal file
@@ -0,0 +1,124 @@
|
||||
require "test_helper"
|
||||
|
||||
class Provider::Anthropic::AutoCategorizerTest < ActiveSupport::TestCase
|
||||
setup do
|
||||
@transactions = [
|
||||
{ id: "txn_1", name: "McDonalds", amount: 20, classification: "expense" },
|
||||
{ id: "txn_2", name: "Netflix", amount: 15, classification: "expense" }
|
||||
]
|
||||
@user_categories = [
|
||||
{ id: "cat_food", name: "Fast Food", classification: "expense" },
|
||||
{ id: "cat_subs", name: "Subscriptions", classification: "expense" }
|
||||
]
|
||||
end
|
||||
|
||||
test "issues a forced tool call and maps the response into AutoCategorization records" do
|
||||
fake_response = build_response(content: [
|
||||
tool_use_block(
|
||||
id: "toolu_1",
|
||||
name: "report_categorizations",
|
||||
input: {
|
||||
"categorizations" => [
|
||||
{ "transaction_id" => "txn_1", "category_name" => "Fast Food" },
|
||||
{ "transaction_id" => "txn_2", "category_name" => "Subscriptions" }
|
||||
]
|
||||
}
|
||||
)
|
||||
])
|
||||
client = stub_client(fake_response, expect_request: ->(params) {
|
||||
assert_equal "claude-haiku-4-5", params[:model]
|
||||
assert_equal [ { type: "tool", name: "report_categorizations", disable_parallel_tool_use: true } ].first, params[:tool_choice]
|
||||
assert_equal 1, params[:tools].size
|
||||
assert_equal "report_categorizations", params[:tools].first[:name]
|
||||
})
|
||||
|
||||
result = Provider::Anthropic::AutoCategorizer.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
transactions: @transactions,
|
||||
user_categories: @user_categories
|
||||
).auto_categorize
|
||||
|
||||
assert_equal 2, result.size
|
||||
assert_equal "Fast Food", result.find { |r| r.transaction_id == "txn_1" }.category_name
|
||||
assert_equal "Subscriptions", result.find { |r| r.transaction_id == "txn_2" }.category_name
|
||||
end
|
||||
|
||||
test "normalizes null category names to nil" do
|
||||
fake_response = build_response(content: [
|
||||
tool_use_block(
|
||||
id: "toolu_2",
|
||||
name: "report_categorizations",
|
||||
input: {
|
||||
"categorizations" => [
|
||||
{ "transaction_id" => "txn_1", "category_name" => nil },
|
||||
{ "transaction_id" => "txn_2", "category_name" => "null" }
|
||||
]
|
||||
}
|
||||
)
|
||||
])
|
||||
client = stub_client(fake_response)
|
||||
|
||||
result = Provider::Anthropic::AutoCategorizer.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
transactions: @transactions,
|
||||
user_categories: @user_categories
|
||||
).auto_categorize
|
||||
|
||||
assert_nil result.find { |r| r.transaction_id == "txn_1" }.category_name
|
||||
assert_nil result.find { |r| r.transaction_id == "txn_2" }.category_name
|
||||
end
|
||||
|
||||
test "raises when no tool_use block is present in the response" do
|
||||
fake_response = build_response(content: [ text_block("No tool use") ])
|
||||
client = stub_client(fake_response)
|
||||
|
||||
err = assert_raises(Provider::Anthropic::Error) do
|
||||
Provider::Anthropic::AutoCategorizer.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
transactions: @transactions,
|
||||
user_categories: @user_categories
|
||||
).auto_categorize
|
||||
end
|
||||
|
||||
assert_match(/did not invoke report_categorizations/i, err.message)
|
||||
end
|
||||
|
||||
private
|
||||
def stub_client(response, expect_request: nil)
|
||||
messages = mock
|
||||
if expect_request
|
||||
messages.expects(:create).with do |params|
|
||||
expect_request.call(params)
|
||||
true
|
||||
end.returns(response)
|
||||
else
|
||||
messages.stubs(:create).returns(response)
|
||||
end
|
||||
client = mock
|
||||
client.stubs(:messages).returns(messages)
|
||||
client
|
||||
end
|
||||
|
||||
def build_response(content:, usage: { input_tokens: 50, output_tokens: 25 })
|
||||
OpenStruct.new(
|
||||
id: "msg_test",
|
||||
model: "claude-haiku-4-5",
|
||||
content: content,
|
||||
usage: OpenStruct.new(
|
||||
input_tokens: usage[:input_tokens],
|
||||
output_tokens: usage[:output_tokens]
|
||||
)
|
||||
)
|
||||
end
|
||||
|
||||
def text_block(text)
|
||||
OpenStruct.new(type: :text, text: text)
|
||||
end
|
||||
|
||||
def tool_use_block(id:, name:, input:)
|
||||
OpenStruct.new(type: :tool_use, id: id, name: name, input: input)
|
||||
end
|
||||
end
|
||||
115
test/models/provider/anthropic/auto_merchant_detector_test.rb
Normal file
115
test/models/provider/anthropic/auto_merchant_detector_test.rb
Normal file
@@ -0,0 +1,115 @@
|
||||
require "test_helper"
|
||||
|
||||
class Provider::Anthropic::AutoMerchantDetectorTest < ActiveSupport::TestCase
|
||||
setup do
|
||||
@transactions = [
|
||||
{ id: "txn_1", name: "AMZN purchases", classification: "expense" },
|
||||
{ id: "txn_2", name: "Local diner", classification: "expense" }
|
||||
]
|
||||
@user_merchants = [ { id: "m1", name: "Shooters" } ]
|
||||
end
|
||||
|
||||
test "issues a forced tool call and maps merchants" do
|
||||
fake_response = build_response(content: [
|
||||
tool_use_block(
|
||||
id: "toolu_1",
|
||||
name: "report_merchants",
|
||||
input: {
|
||||
"merchants" => [
|
||||
{ "transaction_id" => "txn_1", "business_name" => "Amazon", "business_url" => "amazon.com" },
|
||||
{ "transaction_id" => "txn_2", "business_name" => nil, "business_url" => nil }
|
||||
]
|
||||
}
|
||||
)
|
||||
])
|
||||
client = stub_client(fake_response, expect_request: ->(params) {
|
||||
assert_equal "claude-haiku-4-5", params[:model]
|
||||
assert_equal "report_merchants", params[:tool_choice][:name]
|
||||
assert params[:tool_choice][:disable_parallel_tool_use]
|
||||
})
|
||||
|
||||
result = Provider::Anthropic::AutoMerchantDetector.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
transactions: @transactions,
|
||||
user_merchants: @user_merchants
|
||||
).auto_detect_merchants
|
||||
|
||||
txn1 = result.find { |r| r.transaction_id == "txn_1" }
|
||||
txn2 = result.find { |r| r.transaction_id == "txn_2" }
|
||||
|
||||
assert_equal "Amazon", txn1.business_name
|
||||
assert_equal "amazon.com", txn1.business_url
|
||||
assert_nil txn2.business_name
|
||||
assert_nil txn2.business_url
|
||||
end
|
||||
|
||||
test "normalizes case-insensitive matches against user_merchants" do
|
||||
fake_response = build_response(content: [
|
||||
tool_use_block(
|
||||
id: "toolu_1",
|
||||
name: "report_merchants",
|
||||
input: {
|
||||
"merchants" => [
|
||||
{ "transaction_id" => "txn_1", "business_name" => "shooters", "business_url" => nil }
|
||||
]
|
||||
}
|
||||
)
|
||||
])
|
||||
client = stub_client(fake_response)
|
||||
|
||||
result = Provider::Anthropic::AutoMerchantDetector.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
transactions: [ @transactions.first ],
|
||||
user_merchants: @user_merchants
|
||||
).auto_detect_merchants
|
||||
|
||||
assert_equal "Shooters", result.first.business_name
|
||||
end
|
||||
|
||||
test "raises when model returns no tool_use" do
|
||||
fake_response = build_response(content: [ OpenStruct.new(type: :text, text: "I cannot help") ])
|
||||
client = stub_client(fake_response)
|
||||
|
||||
err = assert_raises(Provider::Anthropic::Error) do
|
||||
Provider::Anthropic::AutoMerchantDetector.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
transactions: @transactions,
|
||||
user_merchants: @user_merchants
|
||||
).auto_detect_merchants
|
||||
end
|
||||
|
||||
assert_match(/did not invoke report_merchants/i, err.message)
|
||||
end
|
||||
|
||||
private
|
||||
def stub_client(response, expect_request: nil)
|
||||
messages = mock
|
||||
if expect_request
|
||||
messages.expects(:create).with do |params|
|
||||
expect_request.call(params)
|
||||
true
|
||||
end.returns(response)
|
||||
else
|
||||
messages.stubs(:create).returns(response)
|
||||
end
|
||||
client = mock
|
||||
client.stubs(:messages).returns(messages)
|
||||
client
|
||||
end
|
||||
|
||||
def build_response(content:, usage: { input_tokens: 100, output_tokens: 40 })
|
||||
OpenStruct.new(
|
||||
id: "msg_test",
|
||||
model: "claude-haiku-4-5",
|
||||
content: content,
|
||||
usage: OpenStruct.new(input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens])
|
||||
)
|
||||
end
|
||||
|
||||
def tool_use_block(id:, name:, input:)
|
||||
OpenStruct.new(type: :tool_use, id: id, name: name, input: input)
|
||||
end
|
||||
end
|
||||
@@ -0,0 +1,81 @@
|
||||
require "test_helper"
|
||||
|
||||
class Provider::Anthropic::ProviderMerchantEnhancerTest < ActiveSupport::TestCase
|
||||
setup do
|
||||
@merchants = [
|
||||
{ id: "m1", name: "Walmart" },
|
||||
{ id: "m2", name: "Local Diner" }
|
||||
]
|
||||
end
|
||||
|
||||
test "issues a forced tool call and maps enhancements" do
|
||||
fake_response = build_response(content: [
|
||||
tool_use_block(
|
||||
id: "toolu_1",
|
||||
name: "report_enhancements",
|
||||
input: {
|
||||
"merchants" => [
|
||||
{ "merchant_id" => "m1", "business_url" => "walmart.com" },
|
||||
{ "merchant_id" => "m2", "business_url" => nil }
|
||||
]
|
||||
}
|
||||
)
|
||||
])
|
||||
client = stub_client(fake_response, expect_request: ->(params) {
|
||||
assert_equal "report_enhancements", params[:tool_choice][:name]
|
||||
})
|
||||
|
||||
result = Provider::Anthropic::ProviderMerchantEnhancer.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
merchants: @merchants
|
||||
).enhance_merchants
|
||||
|
||||
assert_equal "walmart.com", result.find { |r| r.merchant_id == "m1" }.business_url
|
||||
assert_nil result.find { |r| r.merchant_id == "m2" }.business_url
|
||||
end
|
||||
|
||||
test "raises when model returns no tool_use" do
|
||||
fake_response = build_response(content: [ OpenStruct.new(type: :text, text: "Nope") ])
|
||||
client = stub_client(fake_response)
|
||||
|
||||
err = assert_raises(Provider::Anthropic::Error) do
|
||||
Provider::Anthropic::ProviderMerchantEnhancer.new(
|
||||
client,
|
||||
model: "claude-haiku-4-5",
|
||||
merchants: @merchants
|
||||
).enhance_merchants
|
||||
end
|
||||
|
||||
assert_match(/did not invoke report_enhancements/i, err.message)
|
||||
end
|
||||
|
||||
private
|
||||
def stub_client(response, expect_request: nil)
|
||||
messages = mock
|
||||
if expect_request
|
||||
messages.expects(:create).with do |params|
|
||||
expect_request.call(params)
|
||||
true
|
||||
end.returns(response)
|
||||
else
|
||||
messages.stubs(:create).returns(response)
|
||||
end
|
||||
client = mock
|
||||
client.stubs(:messages).returns(messages)
|
||||
client
|
||||
end
|
||||
|
||||
def build_response(content:, usage: { input_tokens: 60, output_tokens: 20 })
|
||||
OpenStruct.new(
|
||||
id: "msg_test",
|
||||
model: "claude-haiku-4-5",
|
||||
content: content,
|
||||
usage: OpenStruct.new(input_tokens: usage[:input_tokens], output_tokens: usage[:output_tokens])
|
||||
)
|
||||
end
|
||||
|
||||
def tool_use_block(id:, name:, input:)
|
||||
OpenStruct.new(type: :tool_use, id: id, name: name, input: input)
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user