fix(cache): ensure SQL is sanitized before cache key generation (#35419)

This commit is contained in:
Beto Dealmeida
2025-10-02 13:25:54 -04:00
committed by GitHub
parent 6ad8d29fcd
commit 62dc5c0306
6 changed files with 158 additions and 9 deletions

View File

@@ -270,6 +270,60 @@ class TestQueryContext(SupersetTestCase):
cache_key = query_context.query_cache_key(query_object)
assert cache_key_original != cache_key
def test_query_cache_key_consistent_with_different_sql_formatting(self):
"""
Test that cache keys are consistent regardless of SQL clause formatting.
This test verifies the fix for the cache key mismatch issue where different
whitespace formatting in WHERE/HAVING clauses caused different cache keys
to be generated between server and worker processes.
"""
# Create payload with compact WHERE clause
payload1 = get_query_context("birth_names")
payload1["queries"][0]["extras"] = {"where": "(name = 'Amy')"}
query_context1 = ChartDataQueryContextSchema().load(payload1)
# Use get_df_payload which is the actual code path, not query_cache_key directly
result1 = query_context1.get_df_payload(
query_context1.queries[0], force_cached=False
)
cache_key1 = result1.get("cache_key")
# Create same payload but with pretty-formatted WHERE clause (with newlines)
payload2 = get_query_context("birth_names")
payload2["queries"][0]["extras"] = {"where": "(\n name = 'Amy'\n)"}
query_context2 = ChartDataQueryContextSchema().load(payload2)
result2 = query_context2.get_df_payload(
query_context2.queries[0], force_cached=False
)
cache_key2 = result2.get("cache_key")
# Cache keys should be identical after sanitization
assert cache_key1 == cache_key2
# Also verify with HAVING clause
payload3 = get_query_context("birth_names")
payload3["queries"][0]["extras"] = {"having": "(sum__num > 100)"}
query_context3 = ChartDataQueryContextSchema().load(payload3)
result3 = query_context3.get_df_payload(
query_context3.queries[0], force_cached=False
)
cache_key3 = result3.get("cache_key")
payload4 = get_query_context("birth_names")
payload4["queries"][0]["extras"] = {"having": "(\n sum__num > 100\n)"}
query_context4 = ChartDataQueryContextSchema().load(payload4)
result4 = query_context4.get_df_payload(
query_context4.queries[0], force_cached=False
)
cache_key4 = result4.get("cache_key")
# Cache keys should be identical after sanitization
assert cache_key3 == cache_key4
def test_handle_metrics_field(self):
"""
Should support both predefined and adhoc metrics.