# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. from unittest.mock import patch import pytest from superset.utils.hashing import ( hash_from_dict, hash_from_str, ) def test_hash_from_str_sha256(): """Test SHA-256 hashing produces expected output.""" with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"): result = hash_from_str("test") expected = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" assert result == expected def test_hash_from_str_md5(): """Test MD5 hashing for backward compatibility.""" with patch("superset.utils.hashing.get_hash_algorithm", return_value="md5"): result = hash_from_str("test") expected = "098f6bcd4621d373cade4e832627b4f6" assert result == expected def test_hash_from_dict_deterministic(): """Test dictionary hashing is deterministic.""" with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"): obj = {"key": "value", "number": 42} hash1 = hash_from_dict(obj) hash2 = hash_from_dict(obj) assert hash1 == hash2 def test_hash_from_dict_key_order_invariant(): """Test dictionary hashing is invariant to key order.""" with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"): obj1 = {"a": 1, "b": 2, "c": 3} obj2 = {"c": 3, "a": 1, "b": 2} assert hash_from_dict(obj1) == hash_from_dict(obj2) def test_hash_algorithm_override(): """Test explicit algorithm override.""" # Config set to SHA-256 with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"): # Force MD5 via parameter result = hash_from_str("test", algorithm="md5") expected_md5 = "098f6bcd4621d373cade4e832627b4f6" assert result == expected_md5 # Force SHA-256 via parameter (redundant but valid) result = hash_from_str("test", algorithm="sha256") expected_sha256 = ( "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" ) assert result == expected_sha256 def test_backward_compatibility_alias_md5(): """Test legacy function names work with MD5.""" with patch("superset.utils.hashing.get_hash_algorithm", return_value="md5"): result = hash_from_str("test") expected = "098f6bcd4621d373cade4e832627b4f6" assert result == expected def test_backward_compatibility_alias_sha256(): """Test legacy function names work with SHA-256.""" with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"): result = hash_from_str("test") # Should return SHA-256, not MD5 assert len(result) == 64 # SHA-256 hex length expected = "9f86d081884c7d659a2feaa0c55ad015a3bf4f1b2b0b822cd15d6c15b0f00a08" assert result == expected def test_backward_compatibility_dict_alias(): """Test legacy dict function name.""" with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"): obj = {"key": "value"} result = hash_from_dict(obj) # Should use SHA-256 assert len(result) == 64 def test_invalid_algorithm_raises(): """Test invalid algorithm raises ValueError.""" with pytest.raises(ValueError, match="Unsupported hash algorithm"): hash_from_str("test", algorithm="sha1") def test_empty_string(): """Test hashing empty string.""" with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"): result = hash_from_str("") # SHA-256 of empty string expected = "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" assert result == expected def test_empty_dict(): """Test hashing empty dictionary.""" with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"): result = hash_from_dict({}) # Should hash the JSON representation "{}" assert isinstance(result, str) assert len(result) == 64 def test_unicode_string(): """Test hashing Unicode strings.""" with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"): result = hash_from_str("Hello δΈ–η•Œ 🌍") # Should handle Unicode correctly assert isinstance(result, str) assert len(result) == 64 def test_nested_dict(): """Test hashing nested dictionaries.""" with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"): obj = {"outer": {"inner": {"deep": "value"}}, "list": [1, 2, 3]} result = hash_from_dict(obj) assert isinstance(result, str) assert len(result) == 64 def test_dict_with_nan(): """Test hashing dictionary with NaN values.""" with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"): import math obj = {"value": math.nan, "normal": 42} # Should handle NaN with ignore_nan parameter result = hash_from_dict(obj, ignore_nan=True) assert isinstance(result, str) assert len(result) == 64 def test_hash_consistency_across_runs(): """Test that hashing is consistent across multiple invocations.""" with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"): test_string = "consistency_test" results = [hash_from_str(test_string) for _ in range(10)] # All results should be identical assert len(set(results)) == 1 def test_md5_vs_sha256_different_outputs(): """Test that MD5 and SHA-256 produce different hashes.""" test_string = "compare" with patch("superset.utils.hashing.get_hash_algorithm", return_value="md5"): md5_result = hash_from_str(test_string) with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"): sha256_result = hash_from_str(test_string) # Hashes should be different assert md5_result != sha256_result # MD5 produces 32 character hex string assert len(md5_result) == 32 # SHA-256 produces 64 character hex string assert len(sha256_result) == 64