mirror of
https://github.com/we-promise/sure.git
synced 2026-04-16 10:34:09 +00:00
Provider merchants enhancement (#1254)
* Add AI merchant enhancement and dedup * Enhancements Add error if job is already running add note that we also merge merchants * Allow updating provider website * Review fixes * Update provider_merchant.rb * Linter and fixes * FIX transaction quick menu modal
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import {
|
||||
autoUpdate,
|
||||
computePosition,
|
||||
flip,
|
||||
offset,
|
||||
shift,
|
||||
} from "@floating-ui/dom";
|
||||
@@ -110,7 +111,7 @@ export default class extends Controller {
|
||||
|
||||
computePosition(this.buttonTarget, this.contentTarget, {
|
||||
placement: useMobileFullwidth ? "bottom" : this.placementValue,
|
||||
middleware: [offset(this.offsetValue), shift({ padding: 5 })],
|
||||
middleware: [offset(this.offsetValue), flip({ padding: 5 }), shift({ padding: 5 })],
|
||||
strategy: "fixed",
|
||||
}).then(({ x, y }) => {
|
||||
if (useMobileFullwidth) {
|
||||
|
||||
@@ -17,6 +17,9 @@ class FamilyMerchantsController < ApplicationController
|
||||
assigned_ids = @provider_merchants.pluck(:id)
|
||||
@unlinked_merchants = ProviderMerchant.where(id: recently_unlinked_ids - assigned_ids).alphabetically
|
||||
|
||||
@enhanceable_count = @provider_merchants.where(website_url: [ nil, "" ]).count
|
||||
@llm_available = Provider::Registry.get_provider(:openai).present?
|
||||
|
||||
render layout: "settings"
|
||||
end
|
||||
|
||||
@@ -42,11 +45,21 @@ class FamilyMerchantsController < ApplicationController
|
||||
|
||||
def update
|
||||
if @merchant.is_a?(ProviderMerchant)
|
||||
# Convert ProviderMerchant to FamilyMerchant for this family only
|
||||
@family_merchant = @merchant.convert_to_family_merchant_for(Current.family, merchant_params)
|
||||
respond_to do |format|
|
||||
format.html { redirect_to family_merchants_path, notice: t(".converted_success") }
|
||||
format.turbo_stream { render turbo_stream: turbo_stream.action(:redirect, family_merchants_path) }
|
||||
if merchant_params[:name].present? && merchant_params[:name] != @merchant.name
|
||||
# Name changed — convert ProviderMerchant to FamilyMerchant for this family only
|
||||
@family_merchant = @merchant.convert_to_family_merchant_for(Current.family, merchant_params)
|
||||
respond_to do |format|
|
||||
format.html { redirect_to family_merchants_path, notice: t(".converted_success") }
|
||||
format.turbo_stream { render turbo_stream: turbo_stream.action(:redirect, family_merchants_path) }
|
||||
end
|
||||
else
|
||||
# Only website changed — update the ProviderMerchant directly
|
||||
@merchant.update!(merchant_params.slice(:website_url))
|
||||
@merchant.generate_logo_url_from_website!
|
||||
respond_to do |format|
|
||||
format.html { redirect_to family_merchants_path, notice: t(".success") }
|
||||
format.turbo_stream { render turbo_stream: turbo_stream.action(:redirect, family_merchants_path) }
|
||||
end
|
||||
end
|
||||
elsif @merchant.update(merchant_params)
|
||||
respond_to do |format|
|
||||
@@ -72,6 +85,19 @@ class FamilyMerchantsController < ApplicationController
|
||||
end
|
||||
end
|
||||
|
||||
def enhance
|
||||
cache_key = "enhance_provider_merchants:#{Current.family.id}"
|
||||
|
||||
already_running = !Rails.cache.write(cache_key, true, expires_in: 10.minutes, unless_exist: true)
|
||||
|
||||
if already_running
|
||||
return redirect_to family_merchants_path, alert: t(".already_running")
|
||||
end
|
||||
|
||||
EnhanceProviderMerchantsJob.perform_later(Current.family)
|
||||
redirect_to family_merchants_path, notice: t(".success")
|
||||
end
|
||||
|
||||
def merge
|
||||
@merchants = all_family_merchants
|
||||
end
|
||||
|
||||
9
app/jobs/enhance_provider_merchants_job.rb
Normal file
9
app/jobs/enhance_provider_merchants_job.rb
Normal file
@@ -0,0 +1,9 @@
|
||||
class EnhanceProviderMerchantsJob < ApplicationJob
|
||||
queue_as :medium_priority
|
||||
|
||||
def perform(family)
|
||||
ProviderMerchant::Enhancer.new(family).enhance
|
||||
ensure
|
||||
Rails.cache.delete("enhance_provider_merchants:#{family.id}")
|
||||
end
|
||||
end
|
||||
@@ -13,6 +13,12 @@ module Provider::LlmConcept
|
||||
raise NotImplementedError, "Subclasses must implement #auto_detect_merchants"
|
||||
end
|
||||
|
||||
EnhancedMerchant = Data.define(:merchant_id, :business_url)
|
||||
|
||||
def enhance_provider_merchants(merchants)
|
||||
raise NotImplementedError, "Subclasses must implement #enhance_provider_merchants"
|
||||
end
|
||||
|
||||
PdfProcessingResult = Data.define(:summary, :document_type, :extracted_data)
|
||||
|
||||
def supports_pdf_processing?
|
||||
|
||||
@@ -116,6 +116,33 @@ class Provider::Openai < Provider
|
||||
end
|
||||
end
|
||||
|
||||
def enhance_provider_merchants(merchants: [], model: "", family: nil, json_mode: nil)
|
||||
with_provider_response do
|
||||
raise Error, "Too many merchants to enhance. Max is 25 per request." if merchants.size > 25
|
||||
|
||||
effective_model = model.presence || @default_model
|
||||
|
||||
trace = create_langfuse_trace(
|
||||
name: "openai.enhance_provider_merchants",
|
||||
input: { merchants: merchants }
|
||||
)
|
||||
|
||||
result = ProviderMerchantEnhancer.new(
|
||||
client,
|
||||
model: effective_model,
|
||||
merchants: merchants,
|
||||
custom_provider: custom_provider?,
|
||||
langfuse_trace: trace,
|
||||
family: family,
|
||||
json_mode: json_mode
|
||||
).enhance_merchants
|
||||
|
||||
upsert_langfuse_trace(trace: trace, output: result.map(&:to_h))
|
||||
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
# Can be disabled via ENV for OpenAI-compatible endpoints that don't support vision
|
||||
# Only vision-capable models (gpt-4o, gpt-4-turbo, gpt-4.1, etc.) support PDF input
|
||||
def supports_pdf_processing?(model: @default_model)
|
||||
|
||||
402
app/models/provider/openai/provider_merchant_enhancer.rb
Normal file
402
app/models/provider/openai/provider_merchant_enhancer.rb
Normal file
@@ -0,0 +1,402 @@
|
||||
class Provider::Openai::ProviderMerchantEnhancer
|
||||
include Provider::Openai::Concerns::UsageRecorder
|
||||
|
||||
attr_reader :client, :model, :merchants, :custom_provider, :langfuse_trace, :family, :json_mode
|
||||
|
||||
def initialize(client, model: "", merchants:, custom_provider: false, langfuse_trace: nil, family: nil, json_mode: nil)
|
||||
@client = client
|
||||
@model = model
|
||||
@merchants = merchants
|
||||
@custom_provider = custom_provider
|
||||
@langfuse_trace = langfuse_trace
|
||||
@family = family
|
||||
@json_mode = json_mode || default_json_mode
|
||||
end
|
||||
|
||||
VALID_JSON_MODES = Provider::Openai::AutoMerchantDetector::VALID_JSON_MODES
|
||||
|
||||
def default_json_mode
|
||||
env_mode = ENV["LLM_JSON_MODE"]
|
||||
return env_mode if env_mode.present? && VALID_JSON_MODES.include?(env_mode)
|
||||
|
||||
setting_mode = Setting.openai_json_mode
|
||||
return setting_mode if setting_mode.present? && VALID_JSON_MODES.include?(setting_mode)
|
||||
|
||||
Provider::Openai::AutoMerchantDetector::JSON_MODE_AUTO
|
||||
end
|
||||
|
||||
def enhance_merchants
|
||||
if custom_provider
|
||||
enhance_merchants_generic
|
||||
else
|
||||
enhance_merchants_native
|
||||
end
|
||||
end
|
||||
|
||||
def instructions
|
||||
if custom_provider
|
||||
simple_instructions
|
||||
else
|
||||
detailed_instructions
|
||||
end
|
||||
end
|
||||
|
||||
def simple_instructions
|
||||
<<~INSTRUCTIONS.strip_heredoc
|
||||
Identify business websites from merchant names. Return JSON only.
|
||||
|
||||
Rules:
|
||||
1. Match merchant_id exactly from input
|
||||
2. Return the business website URL without "www." prefix
|
||||
3. Return "null" if uncertain, generic, or a local business
|
||||
4. Only return values if 80%+ confident
|
||||
|
||||
Example output format:
|
||||
{"merchants": [{"merchant_id": "id_001", "business_url": "amazon.com"}]}
|
||||
INSTRUCTIONS
|
||||
end
|
||||
|
||||
def detailed_instructions
|
||||
<<~INSTRUCTIONS.strip_heredoc
|
||||
You are an assistant to a consumer personal finance app.
|
||||
|
||||
Given a list of merchant names, identify the business website URL for each.
|
||||
|
||||
Closely follow ALL the rules below:
|
||||
|
||||
- Return 1 result per merchant
|
||||
- Correlate each merchant by ID (merchant_id)
|
||||
- Do not include the subdomain in the business_url (i.e. "amazon.com" not "www.amazon.com")
|
||||
- Be slightly pessimistic. We favor returning "null" over returning a false positive.
|
||||
- NEVER return a URL for generic or local merchant names (e.g. "Local Diner", "Gas Station", "ATM Withdrawal")
|
||||
|
||||
Determining a value:
|
||||
|
||||
- Attempt to determine the website URL from your knowledge of global and regional businesses
|
||||
- If no certain match, return "null"
|
||||
|
||||
Example 1 (known business):
|
||||
|
||||
```
|
||||
Merchant name: "Walmart"
|
||||
|
||||
Result:
|
||||
- business_url: "walmart.com"
|
||||
```
|
||||
|
||||
Example 2 (generic/local business):
|
||||
|
||||
```
|
||||
Merchant name: "Local diner"
|
||||
|
||||
Result:
|
||||
- business_url: null
|
||||
```
|
||||
INSTRUCTIONS
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def enhance_merchants_native
|
||||
span = langfuse_trace&.span(name: "enhance_provider_merchants_api_call", input: {
|
||||
model: model.presence || Provider::Openai::DEFAULT_MODEL,
|
||||
merchants: merchants
|
||||
})
|
||||
|
||||
response = client.responses.create(parameters: {
|
||||
model: model.presence || Provider::Openai::DEFAULT_MODEL,
|
||||
input: [ { role: "developer", content: developer_message } ],
|
||||
text: {
|
||||
format: {
|
||||
type: "json_schema",
|
||||
name: "enhance_provider_merchants",
|
||||
strict: true,
|
||||
schema: json_schema
|
||||
}
|
||||
},
|
||||
instructions: instructions
|
||||
})
|
||||
|
||||
Rails.logger.info("Tokens used to enhance provider merchants: #{response.dig("usage", "total_tokens")}")
|
||||
|
||||
result = extract_and_build_response_native(response)
|
||||
|
||||
record_usage(
|
||||
model.presence || Provider::Openai::DEFAULT_MODEL,
|
||||
response.dig("usage"),
|
||||
operation: "enhance_provider_merchants",
|
||||
metadata: { merchant_count: merchants.size }
|
||||
)
|
||||
|
||||
span&.end(output: result.map(&:to_h), usage: response.dig("usage"))
|
||||
result
|
||||
rescue => e
|
||||
span&.end(output: { error: e.message }, level: "ERROR")
|
||||
raise
|
||||
end
|
||||
|
||||
def enhance_merchants_generic
|
||||
if json_mode == Provider::Openai::AutoMerchantDetector::JSON_MODE_AUTO
|
||||
enhance_merchants_with_auto_mode
|
||||
else
|
||||
enhance_merchants_with_mode(json_mode)
|
||||
end
|
||||
rescue Faraday::BadRequestError => e
|
||||
if json_mode == Provider::Openai::AutoMerchantDetector::JSON_MODE_STRICT || json_mode == Provider::Openai::AutoMerchantDetector::JSON_MODE_AUTO
|
||||
Rails.logger.warn("Strict JSON mode failed for merchant enhancement, falling back to none mode: #{e.message}")
|
||||
enhance_merchants_with_mode(Provider::Openai::AutoMerchantDetector::JSON_MODE_NONE)
|
||||
else
|
||||
raise
|
||||
end
|
||||
end
|
||||
|
||||
def enhance_merchants_with_auto_mode
|
||||
result = enhance_merchants_with_mode(Provider::Openai::AutoMerchantDetector::JSON_MODE_STRICT)
|
||||
|
||||
null_count = result.count { |r| r.business_url.nil? }
|
||||
missing_count = merchants.size - result.size
|
||||
failed_count = null_count + missing_count
|
||||
failed_ratio = merchants.size > 0 ? failed_count.to_f / merchants.size : 0.0
|
||||
|
||||
if failed_ratio > Provider::Openai::AutoMerchantDetector::AUTO_MODE_NULL_THRESHOLD
|
||||
Rails.logger.info("Auto mode: #{(failed_ratio * 100).round}% failed in strict mode for merchant enhancement, retrying with none mode")
|
||||
enhance_merchants_with_mode(Provider::Openai::AutoMerchantDetector::JSON_MODE_NONE)
|
||||
else
|
||||
result
|
||||
end
|
||||
end
|
||||
|
||||
def enhance_merchants_with_mode(mode)
|
||||
span = langfuse_trace&.span(name: "enhance_provider_merchants_api_call", input: {
|
||||
model: model.presence || Provider::Openai::DEFAULT_MODEL,
|
||||
merchants: merchants,
|
||||
json_mode: mode
|
||||
})
|
||||
|
||||
params = {
|
||||
model: model.presence || Provider::Openai::DEFAULT_MODEL,
|
||||
messages: [
|
||||
{ role: "system", content: instructions },
|
||||
{ role: "user", content: developer_message_for_generic }
|
||||
]
|
||||
}
|
||||
|
||||
case mode
|
||||
when Provider::Openai::AutoMerchantDetector::JSON_MODE_STRICT
|
||||
params[:response_format] = {
|
||||
type: "json_schema",
|
||||
json_schema: {
|
||||
name: "enhance_provider_merchants",
|
||||
strict: true,
|
||||
schema: json_schema
|
||||
}
|
||||
}
|
||||
when Provider::Openai::AutoMerchantDetector::JSON_MODE_OBJECT
|
||||
params[:response_format] = { type: "json_object" }
|
||||
end
|
||||
|
||||
response = client.chat(parameters: params)
|
||||
|
||||
Rails.logger.info("Tokens used to enhance provider merchants: #{response.dig("usage", "total_tokens")} (json_mode: #{mode})")
|
||||
|
||||
result = extract_and_build_response_generic(response)
|
||||
|
||||
record_usage(
|
||||
model.presence || Provider::Openai::DEFAULT_MODEL,
|
||||
response.dig("usage"),
|
||||
operation: "enhance_provider_merchants",
|
||||
metadata: { merchant_count: merchants.size, json_mode: mode }
|
||||
)
|
||||
|
||||
span&.end(output: result.map(&:to_h), usage: response.dig("usage"))
|
||||
result
|
||||
rescue => e
|
||||
span&.end(output: { error: e.message }, level: "ERROR")
|
||||
raise
|
||||
end
|
||||
|
||||
EnhancedMerchant = Provider::LlmConcept::EnhancedMerchant
|
||||
|
||||
def build_response(raw_merchants)
|
||||
raw_merchants.map do |merchant|
|
||||
EnhancedMerchant.new(
|
||||
merchant_id: merchant.dig("merchant_id"),
|
||||
business_url: normalize_value(merchant.dig("business_url"))
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_value(value)
|
||||
return nil if value.nil? || value == "null" || value.to_s.downcase == "null"
|
||||
value
|
||||
end
|
||||
|
||||
def extract_and_build_response_native(response)
|
||||
message_output = response["output"]&.find { |o| o["type"] == "message" }
|
||||
raw = message_output&.dig("content", 0, "text")
|
||||
|
||||
raise Provider::Openai::Error, "No message content found in response" if raw.nil?
|
||||
|
||||
merchants_data = JSON.parse(raw).dig("merchants")
|
||||
build_response(merchants_data)
|
||||
rescue JSON::ParserError => e
|
||||
raise Provider::Openai::Error, "Invalid JSON in merchant enhancement: #{e.message}"
|
||||
end
|
||||
|
||||
def extract_and_build_response_generic(response)
|
||||
raw = response.dig("choices", 0, "message", "content")
|
||||
parsed = parse_json_flexibly(raw)
|
||||
|
||||
merchants_data = parsed.dig("merchants") ||
|
||||
parsed.dig("results") ||
|
||||
(parsed.is_a?(Array) ? parsed : nil)
|
||||
|
||||
raise Provider::Openai::Error, "Could not find merchants in response" if merchants_data.nil?
|
||||
|
||||
merchants_data.map! do |m|
|
||||
{
|
||||
"merchant_id" => m["merchant_id"] || m["id"],
|
||||
"business_url" => m["business_url"] || m["url"] || m["website"]
|
||||
}
|
||||
end
|
||||
|
||||
build_response(merchants_data)
|
||||
end
|
||||
|
||||
# Reuse flexible JSON parsing from AutoMerchantDetector
|
||||
def parse_json_flexibly(raw)
|
||||
return {} if raw.blank?
|
||||
|
||||
cleaned = strip_thinking_tags(raw)
|
||||
|
||||
JSON.parse(cleaned)
|
||||
rescue JSON::ParserError
|
||||
# Strategy 1: Closed markdown code blocks
|
||||
if cleaned =~ /```(?:json)?\s*(\{[\s\S]*?\})\s*```/m
|
||||
matches = cleaned.scan(/```(?:json)?\s*(\{[\s\S]*?\})\s*```/m).flatten
|
||||
matches.reverse_each do |match|
|
||||
begin
|
||||
return JSON.parse(match)
|
||||
rescue JSON::ParserError
|
||||
next
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Strategy 2: Unclosed markdown code blocks
|
||||
if cleaned =~ /```(?:json)?\s*(\{[\s\S]*\})\s*$/m
|
||||
begin
|
||||
return JSON.parse($1)
|
||||
rescue JSON::ParserError
|
||||
end
|
||||
end
|
||||
|
||||
# Strategy 3: Find JSON object with "merchants" key
|
||||
if cleaned =~ /(\{"merchants"\s*:\s*\[[\s\S]*\]\s*\})/m
|
||||
matches = cleaned.scan(/(\{"merchants"\s*:\s*\[[\s\S]*?\]\s*\})/m).flatten
|
||||
matches.reverse_each do |match|
|
||||
begin
|
||||
return JSON.parse(match)
|
||||
rescue JSON::ParserError
|
||||
next
|
||||
end
|
||||
end
|
||||
begin
|
||||
return JSON.parse($1)
|
||||
rescue JSON::ParserError
|
||||
end
|
||||
end
|
||||
|
||||
# Strategy 4: Find any JSON object
|
||||
if cleaned =~ /(\{[\s\S]*\})/m
|
||||
begin
|
||||
return JSON.parse($1)
|
||||
rescue JSON::ParserError
|
||||
end
|
||||
end
|
||||
|
||||
raise Provider::Openai::Error, "Could not parse JSON from response: #{raw.truncate(200)}"
|
||||
end
|
||||
|
||||
def strip_thinking_tags(raw)
|
||||
if raw.include?("<think>")
|
||||
if raw =~ /<\/think>\s*([\s\S]*)/m
|
||||
after_thinking = $1.strip
|
||||
return after_thinking if after_thinking.present?
|
||||
end
|
||||
if raw =~ /<think>([\s\S]*)/m
|
||||
return $1
|
||||
end
|
||||
end
|
||||
raw
|
||||
end
|
||||
|
||||
def json_schema
|
||||
{
|
||||
type: "object",
|
||||
properties: {
|
||||
merchants: {
|
||||
type: "array",
|
||||
description: "An array of merchant website detections",
|
||||
items: {
|
||||
type: "object",
|
||||
properties: {
|
||||
merchant_id: {
|
||||
type: "string",
|
||||
description: "The internal ID of the merchant",
|
||||
enum: merchants.map { |m| m[:id] }
|
||||
},
|
||||
business_url: {
|
||||
type: [ "string", "null" ],
|
||||
description: "The website URL of the business, or `null` if uncertain"
|
||||
}
|
||||
},
|
||||
required: [ "merchant_id", "business_url" ],
|
||||
additionalProperties: false
|
||||
}
|
||||
}
|
||||
},
|
||||
required: [ "merchants" ],
|
||||
additionalProperties: false
|
||||
}
|
||||
end
|
||||
|
||||
def developer_message
|
||||
<<~MESSAGE.strip_heredoc
|
||||
Identify the business website URL for each of the following merchants:
|
||||
|
||||
```json
|
||||
#{merchants.to_json}
|
||||
```
|
||||
|
||||
Return "null" if you are not 80%+ confident in your answer.
|
||||
MESSAGE
|
||||
end
|
||||
|
||||
def developer_message_for_generic
|
||||
<<~MESSAGE.strip_heredoc
|
||||
MERCHANTS TO IDENTIFY:
|
||||
#{format_merchants_simply}
|
||||
|
||||
EXAMPLES of correct website detection:
|
||||
- "Amazon" → business_url: "amazon.com"
|
||||
- "Starbucks" → business_url: "starbucks.com"
|
||||
- "Netflix" → business_url: "netflix.com"
|
||||
- "Local Diner" → business_url: "null" (generic/unknown)
|
||||
- "ATM Withdrawal" → business_url: "null" (generic)
|
||||
|
||||
IMPORTANT:
|
||||
- Return "null" (as a string) if you cannot confidently identify the business website
|
||||
- Don't include "www." in URLs
|
||||
|
||||
Respond with ONLY this JSON format (no other text):
|
||||
{"merchants": [{"merchant_id": "...", "business_url": "..."}]}
|
||||
MESSAGE
|
||||
end
|
||||
|
||||
def format_merchants_simply
|
||||
merchants.map do |m|
|
||||
"- ID: #{m[:id]}, Name: #{m[:name].to_json}"
|
||||
end.join("\n")
|
||||
end
|
||||
end
|
||||
@@ -12,8 +12,7 @@ class ProviderMerchant < Merchant
|
||||
family_merchant = family.merchants.create!(
|
||||
name: attributes[:name].presence || name,
|
||||
color: attributes[:color].presence || FamilyMerchant::COLORS.sample,
|
||||
logo_url: logo_url,
|
||||
website_url: website_url
|
||||
website_url: attributes[:website_url].presence || website_url
|
||||
)
|
||||
|
||||
# Update only this family's transactions to point to new merchant
|
||||
@@ -23,6 +22,17 @@ class ProviderMerchant < Merchant
|
||||
end
|
||||
end
|
||||
|
||||
# Generate logo URL from website_url using BrandFetch, if configured.
|
||||
def generate_logo_url_from_website!
|
||||
if website_url.present? && Setting.brand_fetch_client_id.present?
|
||||
domain = extract_domain(website_url)
|
||||
size = Setting.brand_fetch_logo_size
|
||||
update!(logo_url: "https://cdn.brandfetch.io/#{domain}/icon/fallback/lettermark/w/#{size}/h/#{size}?c=#{Setting.brand_fetch_client_id}")
|
||||
elsif website_url.blank?
|
||||
update!(logo_url: nil)
|
||||
end
|
||||
end
|
||||
|
||||
# Unlink from family's transactions (set merchant_id to null).
|
||||
# Does NOT delete the ProviderMerchant since it may be used by other families.
|
||||
# Tracks the unlink in FamilyMerchantAssociation so it shows as "recently unlinked".
|
||||
@@ -33,4 +43,13 @@ class ProviderMerchant < Merchant
|
||||
association = FamilyMerchantAssociation.find_or_initialize_by(family: family, merchant: self)
|
||||
association.update!(unlinked_at: Time.current)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def extract_domain(url)
|
||||
normalized_url = url.start_with?("http://", "https://") ? url : "https://#{url}"
|
||||
URI.parse(normalized_url).host&.sub(/\Awww\./, "")
|
||||
rescue URI::InvalidURIError
|
||||
url.sub(/\Awww\./, "")
|
||||
end
|
||||
end
|
||||
|
||||
98
app/models/provider_merchant/enhancer.rb
Normal file
98
app/models/provider_merchant/enhancer.rb
Normal file
@@ -0,0 +1,98 @@
|
||||
class ProviderMerchant::Enhancer
|
||||
BATCH_SIZE = 25
|
||||
|
||||
def initialize(family)
|
||||
@family = family
|
||||
end
|
||||
|
||||
def enhance
|
||||
return { enhanced: 0, deduplicated: 0 } unless llm_provider
|
||||
return { enhanced: 0, deduplicated: 0 } if unenhanced_merchants.none?
|
||||
|
||||
Rails.logger.info("Enhancing #{unenhanced_merchants.count} provider merchants for family #{@family.id}")
|
||||
|
||||
enhanced_count = 0
|
||||
deduplicated_count = 0
|
||||
|
||||
unenhanced_merchants.each_slice(BATCH_SIZE) do |batch|
|
||||
result = llm_provider.enhance_provider_merchants(
|
||||
merchants: batch.map { |m| { id: m.id, name: m.name } },
|
||||
family: @family
|
||||
)
|
||||
|
||||
next unless result.success?
|
||||
|
||||
result.data.each do |enhancement|
|
||||
next unless enhancement.business_url.present?
|
||||
|
||||
merchant = batch.find { |m| m.id == enhancement.merchant_id }
|
||||
next unless merchant
|
||||
next if merchant.website_url.present? # Skip if already enhanced (race condition guard)
|
||||
|
||||
# Step 1: Update the provider merchant with website + logo
|
||||
updates = { website_url: enhancement.business_url }
|
||||
updates[:logo_url] = build_logo_url(enhancement.business_url) if Setting.brand_fetch_client_id.present?
|
||||
merchant.update!(updates)
|
||||
enhanced_count += 1
|
||||
|
||||
# Step 2: Deduplicate — find other merchants with the same website_url
|
||||
# and merge them INTO this provider merchant (prefer provider over AI)
|
||||
deduplicated_count += deduplicate_by_website(merchant, enhancement.business_url)
|
||||
rescue ActiveRecord::RecordInvalid => e
|
||||
Rails.logger.error("Failed to enhance merchant #{merchant.id}: #{e.message}")
|
||||
end
|
||||
end
|
||||
|
||||
Rails.logger.info("Enhanced #{enhanced_count} merchants, deduplicated #{deduplicated_count} for family #{@family.id}")
|
||||
|
||||
{ enhanced: enhanced_count, deduplicated: deduplicated_count }
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def deduplicate_by_website(target_merchant, website_url)
|
||||
# Find duplicate provider merchants assigned to this family with the same website_url.
|
||||
# Excludes FamilyMerchants — user-curated merchants should never be touched by dedup.
|
||||
duplicates = @family.assigned_merchants
|
||||
.where(type: "ProviderMerchant")
|
||||
.where(website_url: website_url)
|
||||
.where.not(id: target_merchant.id)
|
||||
|
||||
return 0 if duplicates.none?
|
||||
|
||||
count = 0
|
||||
duplicates.each do |duplicate|
|
||||
# Reassign family's transactions from duplicate to target
|
||||
@family.transactions.where(merchant_id: duplicate.id)
|
||||
.update_all(merchant_id: target_merchant.id)
|
||||
count += 1
|
||||
end
|
||||
count
|
||||
end
|
||||
|
||||
def llm_provider
|
||||
@llm_provider ||= Provider::Registry.get_provider(:openai)
|
||||
end
|
||||
|
||||
def unenhanced_merchants
|
||||
@unenhanced_merchants ||= @family.assigned_merchants
|
||||
.where(type: "ProviderMerchant")
|
||||
.where(website_url: [ nil, "" ])
|
||||
.to_a
|
||||
end
|
||||
|
||||
def build_logo_url(business_url)
|
||||
return nil unless Setting.brand_fetch_client_id.present? && business_url.present?
|
||||
domain = extract_domain(business_url)
|
||||
return nil unless domain.present?
|
||||
size = Setting.brand_fetch_logo_size
|
||||
"https://cdn.brandfetch.io/#{domain}/icon/fallback/lettermark/w/#{size}/h/#{size}?c=#{Setting.brand_fetch_client_id}"
|
||||
end
|
||||
|
||||
def extract_domain(url)
|
||||
normalized_url = url.start_with?("http://", "https://") ? url : "https://#{url}"
|
||||
URI.parse(normalized_url).host&.sub(/\Awww\./, "")
|
||||
rescue URI::InvalidURIError
|
||||
url.sub(/\Awww\./, "")
|
||||
end
|
||||
end
|
||||
@@ -21,12 +21,10 @@
|
||||
<div class="relative flex items-center border border-secondary rounded-lg text-subdued">
|
||||
<%= f.text_field :name, placeholder: t(".name_placeholder"), autofocus: true, required: true, data: { color_avatar_target: "name" } %>
|
||||
</div>
|
||||
<% if family_merchant.is_a?(FamilyMerchant) %>
|
||||
<div class="relative flex items-center border border-secondary rounded-lg text-subdued">
|
||||
<%= f.text_field :website_url, placeholder: t(".website_placeholder") %>
|
||||
</div>
|
||||
<p class="text-xs text-subdued"><%= t(".website_hint") %></p>
|
||||
<% end %>
|
||||
<div class="relative flex items-center border border-secondary rounded-lg text-subdued">
|
||||
<%= f.text_field :website_url, placeholder: t(".website_placeholder") %>
|
||||
</div>
|
||||
<p class="text-xs text-subdued"><%= t(".website_hint") %></p>
|
||||
</section>
|
||||
|
||||
<section>
|
||||
|
||||
@@ -68,6 +68,22 @@
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<% if @enhanceable_count > 0 && @llm_available %>
|
||||
<div class="flex items-center justify-between p-4 bg-container-inset border border-secondary rounded-lg">
|
||||
<div class="flex items-start gap-2">
|
||||
<%= icon "sparkles", class: "w-5 h-5 text-link mt-0.5 flex-shrink-0" %>
|
||||
<p class="text-xs text-secondary leading-relaxed"><%= t(".enhance_info", count: @enhanceable_count) %></p>
|
||||
</div>
|
||||
<%= render DS::Button.new(
|
||||
text: t(".enhance_button"),
|
||||
variant: :outline,
|
||||
size: :sm,
|
||||
icon: "sparkles",
|
||||
href: enhance_family_merchants_path
|
||||
) %>
|
||||
</div>
|
||||
<% end %>
|
||||
|
||||
<% if @provider_merchants.any? %>
|
||||
<div class="rounded-xl bg-container-inset space-y-1 p-1">
|
||||
<div class="bg-container rounded-lg shadow-border-xs overflow-x-auto">
|
||||
|
||||
@@ -24,6 +24,10 @@ en:
|
||||
provider_empty: "No provider merchants linked to this %{moniker} yet"
|
||||
provider_read_only: Provider merchants are synced from your connected institutions. They cannot be edited here.
|
||||
provider_info: These merchants were automatically detected by your bank connections or AI. You can edit them to create your own copy, or remove them to unlink from your transactions.
|
||||
enhance_info:
|
||||
one: "%{count} provider merchant is missing website information. Enhance with AI to detect websites, display logos, and merge duplicate merchants."
|
||||
other: "%{count} provider merchants are missing website information. Enhance with AI to detect websites, display logos, and merge duplicate merchants."
|
||||
enhance_button: Enhance with AI
|
||||
unlinked_title: Recently unlinked
|
||||
unlinked_info: These merchants were recently removed from your transactions. They will disappear from this list after 30 days unless re-assigned to a transaction.
|
||||
table:
|
||||
@@ -57,6 +61,9 @@ en:
|
||||
remove: Remove
|
||||
remove_confirm_title: Remove merchant?
|
||||
remove_confirm_body: Are you sure you want to remove %{name}? This will unlink all associated transactions from this merchant but will not delete the merchant itself.
|
||||
enhance:
|
||||
success: Provider merchant enhancement started. Merchants will be enhanced and duplicates merged shortly.
|
||||
already_running: Enhancement is already in progress. Please wait for it to finish.
|
||||
update:
|
||||
success: Merchant updated successfully
|
||||
converted_success: Merchant converted and updated successfully
|
||||
|
||||
@@ -224,6 +224,7 @@ Rails.application.routes.draw do
|
||||
collection do
|
||||
get :merge
|
||||
post :perform_merge
|
||||
post :enhance
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -36,4 +36,12 @@ class FamilyMerchantsControllerTest < ActionDispatch::IntegrationTest
|
||||
|
||||
assert_redirected_to family_merchants_path
|
||||
end
|
||||
|
||||
test "enhance enqueues job and redirects" do
|
||||
assert_enqueued_with(job: EnhanceProviderMerchantsJob) do
|
||||
post enhance_family_merchants_path
|
||||
end
|
||||
|
||||
assert_redirected_to family_merchants_path
|
||||
end
|
||||
end
|
||||
|
||||
103
test/models/provider_merchant/enhancer_test.rb
Normal file
103
test/models/provider_merchant/enhancer_test.rb
Normal file
@@ -0,0 +1,103 @@
|
||||
require "test_helper"
|
||||
|
||||
class ProviderMerchant::EnhancerTest < ActiveSupport::TestCase
|
||||
include EntriesTestHelper, ProviderTestHelper
|
||||
|
||||
setup do
|
||||
@family = families(:dylan_family)
|
||||
@account = @family.accounts.create!(name: "Enhancer test", balance: 100, currency: "USD", accountable: Depository.new)
|
||||
@llm_provider = mock
|
||||
Provider::Registry.stubs(:get_provider).with(:openai).returns(@llm_provider)
|
||||
Setting.stubs(:brand_fetch_client_id).returns("test_client_id")
|
||||
Setting.stubs(:brand_fetch_logo_size).returns(40)
|
||||
end
|
||||
|
||||
test "enhances provider merchants with website and logo" do
|
||||
merchant = ProviderMerchant.create!(source: "lunchflow", name: "Walmart", provider_merchant_id: "lf_walmart")
|
||||
create_transaction(account: @account, name: "Walmart purchase", merchant: merchant)
|
||||
|
||||
provider_response = provider_success_response([
|
||||
EnhancedMerchant.new(merchant_id: merchant.id, business_url: "walmart.com")
|
||||
])
|
||||
|
||||
@llm_provider.expects(:enhance_provider_merchants).returns(provider_response).once
|
||||
|
||||
result = ProviderMerchant::Enhancer.new(@family).enhance
|
||||
|
||||
assert_equal 1, result[:enhanced]
|
||||
assert_equal "walmart.com", merchant.reload.website_url
|
||||
assert_equal "https://cdn.brandfetch.io/walmart.com/icon/fallback/lettermark/w/40/h/40?c=test_client_id", merchant.logo_url
|
||||
end
|
||||
|
||||
test "skips merchants when LLM returns null" do
|
||||
merchant = ProviderMerchant.create!(source: "lunchflow", name: "Local Diner", provider_merchant_id: "lf_local")
|
||||
create_transaction(account: @account, name: "Local diner", merchant: merchant)
|
||||
|
||||
provider_response = provider_success_response([
|
||||
EnhancedMerchant.new(merchant_id: merchant.id, business_url: nil)
|
||||
])
|
||||
|
||||
@llm_provider.expects(:enhance_provider_merchants).returns(provider_response).once
|
||||
|
||||
result = ProviderMerchant::Enhancer.new(@family).enhance
|
||||
|
||||
assert_equal 0, result[:enhanced]
|
||||
assert_nil merchant.reload.website_url
|
||||
end
|
||||
|
||||
test "deduplicates merchants by website_url" do
|
||||
lunchflow_merchant = ProviderMerchant.create!(source: "lunchflow", name: "Walmart", provider_merchant_id: "lf_walmart")
|
||||
ai_merchant = ProviderMerchant.create!(source: "ai", name: "Walmart", website_url: "walmart.com",
|
||||
logo_url: "https://cdn.brandfetch.io/walmart.com/icon/fallback/lettermark/w/40/h/40?c=test_client_id")
|
||||
|
||||
txn1 = create_transaction(account: @account, name: "Walmart purchase 1", merchant: lunchflow_merchant).transaction
|
||||
txn2 = create_transaction(account: @account, name: "Walmart purchase 2", merchant: ai_merchant).transaction
|
||||
|
||||
provider_response = provider_success_response([
|
||||
EnhancedMerchant.new(merchant_id: lunchflow_merchant.id, business_url: "walmart.com")
|
||||
])
|
||||
|
||||
@llm_provider.expects(:enhance_provider_merchants).returns(provider_response).once
|
||||
|
||||
result = ProviderMerchant::Enhancer.new(@family).enhance
|
||||
|
||||
assert_equal 1, result[:enhanced]
|
||||
assert_equal 1, result[:deduplicated]
|
||||
assert_equal "walmart.com", lunchflow_merchant.reload.website_url
|
||||
|
||||
# AI merchant's transactions should be reassigned to the lunchflow merchant
|
||||
assert_equal lunchflow_merchant.id, txn2.reload.merchant_id
|
||||
assert_equal lunchflow_merchant.id, txn1.reload.merchant_id
|
||||
end
|
||||
|
||||
test "returns zero counts when no LLM provider" do
|
||||
Provider::Registry.stubs(:get_provider).with(:openai).returns(nil)
|
||||
|
||||
result = ProviderMerchant::Enhancer.new(@family).enhance
|
||||
|
||||
assert_equal 0, result[:enhanced]
|
||||
assert_equal 0, result[:deduplicated]
|
||||
end
|
||||
|
||||
test "returns zero counts when no unenhanced merchants" do
|
||||
result = ProviderMerchant::Enhancer.new(@family).enhance
|
||||
|
||||
assert_equal 0, result[:enhanced]
|
||||
assert_equal 0, result[:deduplicated]
|
||||
end
|
||||
|
||||
test "skips merchants that already have website_url" do
|
||||
merchant = ProviderMerchant.create!(source: "lunchflow", name: "Amazon", provider_merchant_id: "lf_amazon", website_url: "amazon.com")
|
||||
create_transaction(account: @account, name: "Amazon order", merchant: merchant)
|
||||
|
||||
# Should not call LLM because no merchants need enhancement
|
||||
@llm_provider.expects(:enhance_provider_merchants).never
|
||||
|
||||
result = ProviderMerchant::Enhancer.new(@family).enhance
|
||||
|
||||
assert_equal 0, result[:enhanced]
|
||||
end
|
||||
|
||||
private
|
||||
EnhancedMerchant = Provider::LlmConcept::EnhancedMerchant
|
||||
end
|
||||
Reference in New Issue
Block a user