Files
superset2/tests/unit_tests/utils/test_hashing.py
2025-12-09 16:59:07 +00:00

184 lines
6.7 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from unittest.mock import patch
import pytest
from superset.utils.hashing import (
hash_from_dict,
hash_from_str,
)
def test_hash_from_str_sha256():
"""Test SHA-256 hashing produces expected output."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
result = hash_from_str("test")
expected = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
assert result == expected
def test_hash_from_str_md5():
"""Test MD5 hashing for backward compatibility."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="md5"):
result = hash_from_str("test")
expected = "098f6bcd4621d373cade4e832627b4f6"
assert result == expected
def test_hash_from_dict_deterministic():
"""Test dictionary hashing is deterministic."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
obj = {"key": "value", "number": 42}
hash1 = hash_from_dict(obj)
hash2 = hash_from_dict(obj)
assert hash1 == hash2
def test_hash_from_dict_key_order_invariant():
"""Test dictionary hashing is invariant to key order."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
obj1 = {"a": 1, "b": 2, "c": 3}
obj2 = {"c": 3, "a": 1, "b": 2}
assert hash_from_dict(obj1) == hash_from_dict(obj2)
def test_hash_algorithm_override():
"""Test explicit algorithm override."""
# Config set to SHA-256
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
# Force MD5 via parameter
result = hash_from_str("test", algorithm="md5")
expected_md5 = "098f6bcd4621d373cade4e832627b4f6"
assert result == expected_md5
# Force SHA-256 via parameter (redundant but valid)
result = hash_from_str("test", algorithm="sha256")
expected_sha256 = (
"9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
)
assert result == expected_sha256
def test_backward_compatibility_alias_md5():
"""Test legacy function names work with MD5."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="md5"):
result = hash_from_str("test")
expected = "098f6bcd4621d373cade4e832627b4f6"
assert result == expected
def test_backward_compatibility_alias_sha256():
"""Test legacy function names work with SHA-256."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
result = hash_from_str("test")
# Should return SHA-256, not MD5
assert len(result) == 64 # SHA-256 hex length
expected = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08"
assert result == expected
def test_backward_compatibility_dict_alias():
"""Test legacy dict function name."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
obj = {"key": "value"}
result = hash_from_dict(obj)
# Should use SHA-256
assert len(result) == 64
def test_invalid_algorithm_raises():
"""Test invalid algorithm raises ValueError."""
with pytest.raises(ValueError, match="Unsupported hash algorithm"):
hash_from_str("test", algorithm="sha1")
def test_empty_string():
"""Test hashing empty string."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
result = hash_from_str("")
# SHA-256 of empty string
expected = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
assert result == expected
def test_empty_dict():
"""Test hashing empty dictionary."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
result = hash_from_dict({})
# Should hash the JSON representation "{}"
assert isinstance(result, str)
assert len(result) == 64
def test_unicode_string():
"""Test hashing Unicode strings."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
result = hash_from_str("Hello 世界 🌍")
# Should handle Unicode correctly
assert isinstance(result, str)
assert len(result) == 64
def test_nested_dict():
"""Test hashing nested dictionaries."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
obj = {"outer": {"inner": {"deep": "value"}}, "list": [1, 2, 3]}
result = hash_from_dict(obj)
assert isinstance(result, str)
assert len(result) == 64
def test_dict_with_nan():
"""Test hashing dictionary with NaN values."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
import math
obj = {"value": math.nan, "normal": 42}
# Should handle NaN with ignore_nan parameter
result = hash_from_dict(obj, ignore_nan=True)
assert isinstance(result, str)
assert len(result) == 64
def test_hash_consistency_across_runs():
"""Test that hashing is consistent across multiple invocations."""
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
test_string = "consistency_test"
results = [hash_from_str(test_string) for _ in range(10)]
# All results should be identical
assert len(set(results)) == 1
def test_md5_vs_sha256_different_outputs():
"""Test that MD5 and SHA-256 produce different hashes."""
test_string = "compare"
with patch("superset.utils.hashing.get_hash_algorithm", return_value="md5"):
md5_result = hash_from_str(test_string)
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
sha256_result = hash_from_str(test_string)
# Hashes should be different
assert md5_result != sha256_result
# MD5 produces 32 character hex string
assert len(md5_result) == 32
# SHA-256 produces 64 character hex string
assert len(sha256_result) == 64