diff --git a/app/models/provider/yahoo_finance.rb b/app/models/provider/yahoo_finance.rb index eff80232f..5ebfaf099 100644 --- a/app/models/provider/yahoo_finance.rb +++ b/app/models/provider/yahoo_finance.rb @@ -5,15 +5,33 @@ class Provider::YahooFinance < Provider Error = Class.new(Provider::Error) InvalidSecurityPriceError = Class.new(Error) RateLimitError = Class.new(Error) + AuthenticationError = Class.new(Error) InvalidSymbolError = Class.new(Error) MarketClosedError = Class.new(Error) # Cache duration for repeated requests (5 minutes) CACHE_DURATION = 5.minutes + # Maximum cache duration for cookie/crumb authentication + # Even if cookie has longer expiry, cap it to avoid stale crumbs + MAX_CRUMB_CACHE_DURATION = 1.hour + # Maximum lookback window for historical data (configurable) MAX_LOOKBACK_WINDOW = 10.years + # Minimum delay between requests to avoid rate limiting (in seconds) + MIN_REQUEST_INTERVAL = 0.5 + + # Pool of modern browser user-agents to rotate through + # Based on https://github.com/ranaroussi/yfinance/pull/2277 + USER_AGENTS = [ + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.2 Safari/605.1.15", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 Edg/131.0.0.0", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:133.0) Gecko/20100101 Firefox/133.0" + ].freeze + def initialize # Yahoo Finance doesn't require an API key but we may want to add proxy support later @cache_prefix = "yahoo_finance" @@ -133,6 +151,7 @@ class Provider::YahooFinance < Provider return cached_result end + throttle_request response = client.get("#{base_url}/v1/finance/search") do |req| req.params["q"] = symbol.strip.upcase req.params["quotesCount"] = 25 @@ -160,12 +179,29 @@ class Provider::YahooFinance < Provider def fetch_security_info(symbol:, exchange_operating_mic:) with_provider_response do - # Use quoteSummary endpoint which is more reliable - response = client.get("#{base_url}/v10/finance/quoteSummary/#{symbol}") do |req| + # quoteSummary endpoint requires cookie/crumb authentication + throttle_request + cookie, crumb = fetch_cookie_and_crumb + + response = authenticated_client(cookie).get("#{base_url}/v10/finance/quoteSummary/#{symbol}") do |req| req.params["modules"] = "assetProfile,price,quoteType" + req.params["crumb"] = crumb end data = JSON.parse(response.body) + + # Check for auth errors in response body + if data.dig("quoteSummary", "error", "code") == "Unauthorized" + # Clear cached crumb and retry once + clear_crumb_cache + cookie, crumb = fetch_cookie_and_crumb + response = authenticated_client(cookie).get("#{base_url}/v10/finance/quoteSummary/#{symbol}") do |req| + req.params["modules"] = "assetProfile,price,quoteType" + req.params["crumb"] = crumb + end + data = JSON.parse(response.body) + end + result = data.dig("quoteSummary", "result", 0) raise Error, "No security info found for #{symbol}" unless result @@ -231,6 +267,7 @@ class Provider::YahooFinance < Provider period1 = start_date.to_time.utc.to_i period2 = end_date.end_of_day.to_time.utc.to_i + throttle_request response = client.get("#{base_url}/v8/finance/chart/#{symbol}") do |req| req.params["period1"] = period1 req.params["period2"] = period2 @@ -416,8 +453,8 @@ class Provider::YahooFinance < Provider period1 = start_date.to_time.utc.to_i period2 = end_date.end_of_day.to_time.utc.to_i - begin + throttle_request response = client.get("#{base_url}/v8/finance/chart/#{symbol}") do |req| req.params["period1"] = period1 req.params["period2"] = period2 @@ -457,10 +494,11 @@ class Provider::YahooFinance < Provider def client @client ||= Faraday.new(url: base_url) do |faraday| faraday.request(:retry, { - max: 3, - interval: 0.1, + max: max_retries, + interval: retry_interval, interval_randomness: 0.5, backoff_factor: 2, + retry_statuses: [ 429 ], exceptions: [ Faraday::ConnectionFailed, Faraday::TimeoutError ] }) @@ -468,7 +506,8 @@ class Provider::YahooFinance < Provider faraday.response :raise_error # Yahoo Finance requires common browser headers to avoid blocking - faraday.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" + # Rotate user-agents to reduce rate limiting (based on yfinance PR #2277) + faraday.headers["User-Agent"] = random_user_agent faraday.headers["Accept"] = "application/json" faraday.headers["Accept-Language"] = "en-US,en;q=0.9" faraday.headers["Cache-Control"] = "no-cache" @@ -480,6 +519,130 @@ class Provider::YahooFinance < Provider end end + def random_user_agent + USER_AGENTS.sample + end + + def max_retries + ENV.fetch("YAHOO_FINANCE_MAX_RETRIES", 5).to_i + end + + def retry_interval + ENV.fetch("YAHOO_FINANCE_RETRY_INTERVAL", 1.0).to_f + end + + def min_request_interval + ENV.fetch("YAHOO_FINANCE_MIN_REQUEST_INTERVAL", MIN_REQUEST_INTERVAL).to_f + end + + def throttle_request + @last_request_time ||= Time.at(0) + elapsed = Time.current - @last_request_time + sleep_time = min_request_interval - elapsed + sleep(sleep_time) if sleep_time > 0 + @last_request_time = Time.current + end + + # ================================ + # Cookie/Crumb Authentication + # ================================ + + # Fetches and caches the Yahoo Finance cookie and crumb for authenticated endpoints + # The crumb is a CSRF token required by some Yahoo Finance endpoints (e.g., quoteSummary) + def fetch_cookie_and_crumb + cache_key = "#{@cache_prefix}_auth_crumb" + cached = Rails.cache.read(cache_key) + return cached if cached.present? + + # Step 1: Get cookie from Yahoo Finance + cookie_response = auth_client.get("https://fc.yahoo.com") + cookie = extract_cookie(cookie_response) + cookie_max_age = extract_cookie_max_age(cookie_response) + + raise AuthenticationError, "Failed to obtain Yahoo Finance cookie" if cookie.blank? + + # Step 2: Get crumb using the cookie + crumb_response = auth_client.get("#{base_url}/v1/test/getcrumb") do |req| + req.headers["Cookie"] = cookie + end + + crumb = crumb_response.body.strip + + raise AuthenticationError, "Failed to obtain Yahoo Finance crumb" if crumb.blank? + + # Cache the cookie/crumb pair using cookie's max-age, capped at MAX_CRUMB_CACHE_DURATION + cache_duration = [ cookie_max_age || MAX_CRUMB_CACHE_DURATION, MAX_CRUMB_CACHE_DURATION ].min + result = [ cookie, crumb ] + Rails.cache.write(cache_key, result, expires_in: cache_duration) + result + rescue Faraday::Error => e + raise AuthenticationError, "Failed to authenticate with Yahoo Finance: #{e.message}" + end + + def clear_crumb_cache + Rails.cache.delete("#{@cache_prefix}_auth_crumb") + end + + # Extract the authentication cookie from Yahoo Finance response + def extract_cookie(response) + set_cookie = response.headers["set-cookie"] + return nil if set_cookie.blank? + + # Extract the cookie value (format: "A3=d=xxx&S=xxx; Max-Age=31557600; ...") + # We only need the part before the first semicolon + set_cookie.split(";").first + end + + # Extract Max-Age from cookie header and convert to seconds + # Format: "...; Max-Age=31557600; ..." + def extract_cookie_max_age(response) + set_cookie = response.headers["set-cookie"] + return nil if set_cookie.blank? + + max_age_match = set_cookie.match(/Max-Age=(\d+)/i) + return nil unless max_age_match + + max_age_match[1].to_i.seconds + end + + # Client for authentication requests (no error raising - fc.yahoo.com returns 404 but sets cookie) + def auth_client + @auth_client ||= Faraday.new do |faraday| + faraday.headers["User-Agent"] = random_user_agent + faraday.headers["Accept"] = "*/*" + faraday.headers["Accept-Language"] = "en-US,en;q=0.9" + faraday.options.timeout = 10 + faraday.options.open_timeout = 5 + end + end + + # Client for authenticated requests (includes cookie header) + def authenticated_client(cookie) + Faraday.new(url: base_url) do |faraday| + faraday.request(:retry, { + max: max_retries, + interval: retry_interval, + interval_randomness: 0.5, + backoff_factor: 2, + retry_statuses: [ 429 ], + exceptions: [ Faraday::ConnectionFailed, Faraday::TimeoutError ] + }) + + faraday.request :json + faraday.response :raise_error + + faraday.headers["User-Agent"] = random_user_agent + faraday.headers["Accept"] = "application/json" + faraday.headers["Accept-Language"] = "en-US,en;q=0.9" + faraday.headers["Cache-Control"] = "no-cache" + faraday.headers["Pragma"] = "no-cache" + faraday.headers["Cookie"] = cookie + + faraday.options.timeout = 10 + faraday.options.open_timeout = 5 + end + end + def map_country_code(exchange_name) return nil if exchange_name.blank? @@ -614,6 +777,12 @@ class Provider::YahooFinance < Provider case error when Faraday::TooManyRequestsError RateLimitError.new("Yahoo Finance rate limit exceeded", details: error.response&.dig(:body)) + when Faraday::UnauthorizedError + # 401 indicates missing or invalid crumb/cookie authentication + AuthenticationError.new("Yahoo Finance authentication failed (invalid crumb)", details: error.response&.dig(:body)) + when AuthenticationError + # Already an authentication error, return as is + error when Faraday::Error Error.new( error.message, diff --git a/test/models/provider/yahoo_finance_test.rb b/test/models/provider/yahoo_finance_test.rb index 2c01fde3f..e7a569b65 100644 --- a/test/models/provider/yahoo_finance_test.rb +++ b/test/models/provider/yahoo_finance_test.rb @@ -164,6 +164,110 @@ class Provider::YahooFinanceTest < ActiveSupport::TestCase end end + test "handles 401 unauthorized as authentication error" do + unauthorized_error = Faraday::UnauthorizedError.new("Unauthorized", { body: "Invalid Crumb" }) + + @provider.stub :client, ->(*) { raise unauthorized_error } do + result = @provider.send(:with_provider_response) { raise unauthorized_error } + + assert_not result.success? + assert_instance_of Provider::YahooFinance::AuthenticationError, result.error + assert_match(/authentication failed/, result.error.message) + end + end + + # ================================ + # User-Agent Rotation Tests + # ================================ + + test "random_user_agent returns value from USER_AGENTS pool" do + user_agent = @provider.send(:random_user_agent) + assert_includes Provider::YahooFinance::USER_AGENTS, user_agent + end + + test "USER_AGENTS contains multiple modern browser user-agents" do + assert Provider::YahooFinance::USER_AGENTS.length >= 5 + assert Provider::YahooFinance::USER_AGENTS.all? { |ua| ua.include?("Mozilla") } + end + + # ================================ + # Throttling Tests + # ================================ + + test "throttle_request enforces minimum interval between requests" do + # First request should not wait + start_time = Time.current + @provider.send(:throttle_request) + first_elapsed = Time.current - start_time + assert first_elapsed < 0.1, "First request should not wait" + + # Second request should wait approximately min_request_interval + start_time = Time.current + @provider.send(:throttle_request) + second_elapsed = Time.current - start_time + min_interval = @provider.send(:min_request_interval) + assert second_elapsed >= (min_interval - 0.05), "Second request should wait at least #{min_interval - 0.05}s" + end + + # ================================ + # Configuration Tests + # ================================ + + test "max_retries returns default value" do + assert_equal 5, @provider.send(:max_retries) + end + + test "retry_interval returns default value" do + assert_equal 1.0, @provider.send(:retry_interval) + end + + test "min_request_interval returns default value" do + assert_equal 0.5, @provider.send(:min_request_interval) + end + + # ================================ + # Cookie/Crumb Authentication Tests + # ================================ + + test "extract_cookie extracts cookie from set-cookie header" do + mock_response = OpenStruct.new( + headers: { "set-cookie" => "B=abc123&b=3&s=qf; expires=Fri, 18-May-2028 00:00:00 GMT; path=/; domain=.yahoo.com" } + ) + + cookie = @provider.send(:extract_cookie, mock_response) + assert_equal "B=abc123&b=3&s=qf", cookie + end + + test "extract_cookie returns nil when no cookie header" do + mock_response = OpenStruct.new(headers: {}) + cookie = @provider.send(:extract_cookie, mock_response) + assert_nil cookie + end + + test "extract_cookie_max_age parses Max-Age from cookie header" do + mock_response = OpenStruct.new( + headers: { "set-cookie" => "A3=d=xxx; Max-Age=31557600; Domain=.yahoo.com" } + ) + + max_age = @provider.send(:extract_cookie_max_age, mock_response) + assert_equal 31557600.seconds, max_age + end + + test "extract_cookie_max_age returns nil when no Max-Age" do + mock_response = OpenStruct.new( + headers: { "set-cookie" => "A3=d=xxx; Domain=.yahoo.com" } + ) + + max_age = @provider.send(:extract_cookie_max_age, mock_response) + assert_nil max_age + end + + test "clear_crumb_cache removes cached crumb" do + Rails.cache.write("yahoo_finance_auth_crumb", [ "cookie", "crumb" ]) + @provider.send(:clear_crumb_cache) + assert_nil Rails.cache.read("yahoo_finance_auth_crumb") + end + # ================================ # Helper Method Tests # ================================