feat: add option for hash algorithms (#35621)

Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
This commit is contained in:
Daniel Vaz Gaspar
2025-12-09 16:59:07 +00:00
committed by GitHub
parent 8d7c83419c
commit bb22eb1ca8
36 changed files with 1032 additions and 166 deletions

View File

@@ -22,7 +22,7 @@ from unittest.mock import MagicMock, patch
import pytest
from pytest_mock import MockerFixture
from superset.utils.hashing import md5_sha_from_dict
from superset.utils.hashing import hash_from_dict
from superset.utils.screenshots import (
BaseScreenshot,
ChartScreenshot,
@@ -74,9 +74,9 @@ def test_get_screenshot(mocker: MockerFixture, screenshot_obj):
assert screenshot_data == fake_bytes
def test_get_cache_key(screenshot_obj):
def test_get_cache_key(app_context, screenshot_obj):
"""Test get_cache_key method"""
expected_cache_key = md5_sha_from_dict(
expected_cache_key = hash_from_dict(
{
"thumbnail_type": "",
"digest": screenshot_obj.digest,

View File

@@ -687,7 +687,9 @@ def test_merge_extra_filters():
"clause": "WHERE",
"comparator": "someval",
"expressionType": "SIMPLE",
"filterOptionName": "90cfb3c34852eb3bc741b0cc20053b46",
"filterOptionName": (
"eb77ff8188437d8722af8c932727da1e83ec37e88aaf800a3859ed352d87119f"
),
"isExtra": True,
"operator": "in",
"subject": "a",
@@ -696,7 +698,9 @@ def test_merge_extra_filters():
"clause": "WHERE",
"comparator": ["c1", "c2"],
"expressionType": "SIMPLE",
"filterOptionName": "6c178d069965f1c02640661280415d96",
"filterOptionName": (
"48dd60c7ecb8699b51e36ce956ba481aa5382548811aecec71af7e550c59762c"
),
"isExtra": True,
"operator": "==",
"subject": "B",
@@ -735,7 +739,9 @@ def test_merge_extra_filters():
"clause": "WHERE",
"comparator": "someval",
"expressionType": "SIMPLE",
"filterOptionName": "90cfb3c34852eb3bc741b0cc20053b46",
"filterOptionName": (
"eb77ff8188437d8722af8c932727da1e83ec37e88aaf800a3859ed352d87119f"
),
"isExtra": True,
"operator": "in",
"subject": "a",
@@ -744,7 +750,9 @@ def test_merge_extra_filters():
"clause": "WHERE",
"comparator": ["c1", "c2"],
"expressionType": "SIMPLE",
"filterOptionName": "6c178d069965f1c02640661280415d96",
"filterOptionName": (
"48dd60c7ecb8699b51e36ce956ba481aa5382548811aecec71af7e550c59762c"
),
"isExtra": True,
"operator": "==",
"subject": "B",
@@ -769,7 +777,9 @@ def test_merge_extra_filters():
"clause": "WHERE",
"comparator": "hello",
"expressionType": "SIMPLE",
"filterOptionName": "e3cbdd92a2ae23ca92c6d7fca42e36a6",
"filterOptionName": (
"2ca91524f5ab8e39d6aa5373d1f11301ad2c5b95f5aa77eb30d92f572f5b9157"
),
"isExtra": True,
"operator": "like",
"subject": "A",
@@ -933,7 +943,9 @@ def test_merge_extra_filters_merges_different_val_types():
"clause": "WHERE",
"comparator": ["g1", "g2"],
"expressionType": "SIMPLE",
"filterOptionName": "c11969c994b40a83a4ae7d48ff1ea28e",
"filterOptionName": (
"e2f7d6304169124258364916403b2d9208fce39dd7771797726111b7498bbd52"
),
"isExtra": True,
"operator": "in",
"subject": "a",
@@ -985,7 +997,9 @@ def test_merge_extra_filters_merges_different_val_types():
"clause": "WHERE",
"comparator": "someval",
"expressionType": "SIMPLE",
"filterOptionName": "90cfb3c34852eb3bc741b0cc20053b46",
"filterOptionName": (
"eb77ff8188437d8722af8c932727da1e83ec37e88aaf800a3859ed352d87119f"
),
"isExtra": True,
"operator": "in",
"subject": "a",
@@ -1040,7 +1054,9 @@ def test_merge_extra_filters_adds_unequal_lists():
"clause": "WHERE",
"comparator": ["g1", "g2", "g3"],
"expressionType": "SIMPLE",
"filterOptionName": "21cbb68af7b17e62b3b2f75e2190bfd7",
"filterOptionName": (
"b3f17391546e130560efd1e841742bc5f154d09a7d534b8c0ec33fc1c8a146cd"
),
"isExtra": True,
"operator": "in",
"subject": "a",
@@ -1049,7 +1065,9 @@ def test_merge_extra_filters_adds_unequal_lists():
"clause": "WHERE",
"comparator": ["c1", "c2", "c3"],
"expressionType": "SIMPLE",
"filterOptionName": "0a8dcb928f1f4bba97643c6e68d672f1",
"filterOptionName": (
"41ef70f6edada46006253189b27778088da2cf27ccc69f703634493d7396708a"
),
"isExtra": True,
"operator": "==",
"subject": "B",

View File

@@ -0,0 +1,183 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from unittest.mock import patch
import pytest
from superset.utils.hashing import (
hash_from_dict,
hash_from_str,
)
def test_hash_from_str_sha256():
"""Test SHA-256 hashing produces expected output."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
result = hash_from_str("test")
expected = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
assert result == expected
def test_hash_from_str_md5():
"""Test MD5 hashing for backward compatibility."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="md5"):
result = hash_from_str("test")
expected = "098f6bcd4621d373cade4e832627b4f6"
assert result == expected
def test_hash_from_dict_deterministic():
"""Test dictionary hashing is deterministic."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
obj = {"key": "value", "number": 42}
hash1 = hash_from_dict(obj)
hash2 = hash_from_dict(obj)
assert hash1 == hash2
def test_hash_from_dict_key_order_invariant():
"""Test dictionary hashing is invariant to key order."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
obj1 = {"a": 1, "b": 2, "c": 3}
obj2 = {"c": 3, "a": 1, "b": 2}
assert hash_from_dict(obj1) == hash_from_dict(obj2)
def test_hash_algorithm_override():
"""Test explicit algorithm override."""
# Config set to SHA-256
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
# Force MD5 via parameter
result = hash_from_str("test", algorithm="md5")
expected_md5 = "098f6bcd4621d373cade4e832627b4f6"
assert result == expected_md5
# Force SHA-256 via parameter (redundant but valid)
result = hash_from_str("test", algorithm="sha256")
expected_sha256 = (
"9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
)
assert result == expected_sha256
def test_backward_compatibility_alias_md5():
"""Test legacy function names work with MD5."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="md5"):
result = hash_from_str("test")
expected = "098f6bcd4621d373cade4e832627b4f6"
assert result == expected
def test_backward_compatibility_alias_sha256():
"""Test legacy function names work with SHA-256."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
result = hash_from_str("test")
# Should return SHA-256, not MD5
assert len(result) == 64 # SHA-256 hex length
expected = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
assert result == expected
def test_backward_compatibility_dict_alias():
"""Test legacy dict function name."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
obj = {"key": "value"}
result = hash_from_dict(obj)
# Should use SHA-256
assert len(result) == 64
def test_invalid_algorithm_raises():
"""Test invalid algorithm raises ValueError."""
with pytest.raises(ValueError, match="Unsupported hash algorithm"):
hash_from_str("test", algorithm="sha1")
def test_empty_string():
"""Test hashing empty string."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
result = hash_from_str("")
# SHA-256 of empty string
expected = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
assert result == expected
def test_empty_dict():
"""Test hashing empty dictionary."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
result = hash_from_dict({})
# Should hash the JSON representation "{}"
assert isinstance(result, str)
assert len(result) == 64
def test_unicode_string():
"""Test hashing Unicode strings."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
result = hash_from_str("Hello 世界 🌍")
# Should handle Unicode correctly
assert isinstance(result, str)
assert len(result) == 64
def test_nested_dict():
"""Test hashing nested dictionaries."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
obj = {"outer": {"inner": {"deep": "value"}}, "list": [1, 2, 3]}
result = hash_from_dict(obj)
assert isinstance(result, str)
assert len(result) == 64
def test_dict_with_nan():
"""Test hashing dictionary with NaN values."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
import math
obj = {"value": math.nan, "normal": 42}
# Should handle NaN with ignore_nan parameter
result = hash_from_dict(obj, ignore_nan=True)
assert isinstance(result, str)
assert len(result) == 64
def test_hash_consistency_across_runs():
"""Test that hashing is consistent across multiple invocations."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
test_string = "consistency_test"
results = [hash_from_str(test_string) for _ in range(10)]
# All results should be identical
assert len(set(results)) == 1
def test_md5_vs_sha256_different_outputs():
"""Test that MD5 and SHA-256 produce different hashes."""
test_string = "compare"
with patch("superset.utils.hashing.get_hash_algorithm", return_value="md5"):
md5_result = hash_from_str(test_string)
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
sha256_result = hash_from_str(test_string)
# Hashes should be different
assert md5_result != sha256_result
# MD5 produces 32 character hex string
assert len(md5_result) == 32
# SHA-256 produces 64 character hex string
assert len(sha256_result) == 64