mirror of
https://github.com/apache/superset.git
synced 2026-04-19 08:04:53 +00:00
fix(cache): ensure SQL is sanitized before cache key generation (#35419)
This commit is contained in:
@@ -624,3 +624,80 @@ def test_processing_time_offsets_date_range_enabled(processor):
|
||||
assert isinstance(result["df"], pd.DataFrame)
|
||||
assert isinstance(result["queries"], list)
|
||||
assert isinstance(result["cache_keys"], list)
|
||||
|
||||
|
||||
def test_get_df_payload_validates_before_cache_key_generation():
|
||||
"""
|
||||
Test that get_df_payload calls validate() before generating cache key.
|
||||
"""
|
||||
from superset.common.query_object import QueryObject
|
||||
|
||||
# Create a mock query context
|
||||
mock_query_context = MagicMock()
|
||||
mock_query_context.force = False
|
||||
mock_query_context.result_type = "full"
|
||||
|
||||
# Create a mock datasource
|
||||
mock_datasource = MagicMock()
|
||||
mock_datasource.id = 123
|
||||
mock_datasource.uid = "test_datasource"
|
||||
mock_datasource.cache_timeout = None
|
||||
mock_datasource.database.db_engine_spec.engine = "postgresql"
|
||||
mock_datasource.database.extra = "{}"
|
||||
mock_datasource.get_extra_cache_keys.return_value = []
|
||||
mock_datasource.changed_on = None
|
||||
|
||||
# Create processor
|
||||
processor = QueryContextProcessor(mock_query_context)
|
||||
processor._qc_datasource = mock_datasource
|
||||
|
||||
# Create a query object with unsanitized where clause
|
||||
query_obj = QueryObject(
|
||||
datasource=mock_datasource,
|
||||
columns=["col1"],
|
||||
metrics=[],
|
||||
extras={"where": "(\n col1 > 0\n)"}, # Unsanitized with newlines
|
||||
)
|
||||
|
||||
# Track the order of calls
|
||||
call_order = []
|
||||
|
||||
original_validate = query_obj.validate
|
||||
|
||||
def mock_validate(*args, **kwargs):
|
||||
call_order.append("validate")
|
||||
# Update extras to simulate sanitization
|
||||
query_obj.extras["where"] = "(col1 > 0)" # Sanitized, compact format
|
||||
return original_validate(*args, **kwargs)
|
||||
|
||||
original_cache_key = query_obj.cache_key
|
||||
|
||||
def mock_cache_key(*args, **kwargs):
|
||||
call_order.append("cache_key")
|
||||
# Verify that extras have been sanitized at this point
|
||||
assert query_obj.extras["where"] == "(col1 > 0)", (
|
||||
f"Expected sanitized clause in cache_key, got: {query_obj.extras['where']}"
|
||||
)
|
||||
return original_cache_key(*args, **kwargs)
|
||||
|
||||
with patch.object(query_obj, "validate", side_effect=mock_validate):
|
||||
with patch.object(query_obj, "cache_key", side_effect=mock_cache_key):
|
||||
with patch(
|
||||
"superset.common.query_context_processor.QueryCacheManager"
|
||||
) as mock_cache_manager:
|
||||
mock_cache = MagicMock()
|
||||
mock_cache.is_loaded = True
|
||||
mock_cache.df = pd.DataFrame({"col1": [1, 2, 3]})
|
||||
mock_cache.query = "SELECT * FROM table"
|
||||
mock_cache.error_message = None
|
||||
mock_cache.status = "success"
|
||||
mock_cache_manager.get.return_value = mock_cache
|
||||
|
||||
# Call get_df_payload
|
||||
processor.get_df_payload(query_obj, force_cached=False)
|
||||
|
||||
# Verify validate was called before cache_key
|
||||
assert call_order == ["validate", "cache_key"], (
|
||||
f"Expected validate to be called before cache_key, "
|
||||
f"but got call order: {call_order}"
|
||||
)
|
||||
|
||||
@@ -2593,9 +2593,17 @@ def test_is_valid_cvas(sql: str, engine: str, expected: bool) -> None:
|
||||
[
|
||||
("col = 1", "col = 1", "base"),
|
||||
("1=\t\n1", "1 = 1", "base"),
|
||||
("(col = 1)", "(\n col = 1\n)", "base"),
|
||||
("(col1 = 1) AND (col2 = 2)", "(\n col1 = 1\n) AND (\n col2 = 2\n)", "base"),
|
||||
("col = 'abc' -- comment", "col = 'abc' /* comment */", "base"),
|
||||
("(col = 1)", "(col = 1)", "base"), # Compact format without newlines
|
||||
(
|
||||
"(col1 = 1) AND (col2 = 2)",
|
||||
"(col1 = 1) AND (col2 = 2)",
|
||||
"base",
|
||||
), # Compact format
|
||||
(
|
||||
"col = 'abc' -- comment",
|
||||
"col = 'abc'",
|
||||
"base",
|
||||
), # Comments removed for compact format
|
||||
("col = 'col1 = 1) AND (col2 = 2'", "col = 'col1 = 1) AND (col2 = 2'", "base"),
|
||||
("col = 'select 1; select 2'", "col = 'select 1; select 2'", "base"),
|
||||
("col = 'abc -- comment'", "col = 'abc -- comment'", "base"),
|
||||
|
||||
Reference in New Issue
Block a user