mirror of
https://github.com/apache/superset.git
synced 2026-04-19 08:04:53 +00:00
feat: add option for hash algorithms (#35621)
Co-authored-by: Ville Brofeldt <33317356+villebro@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
8d7c83419c
commit
bb22eb1ca8
@@ -19,7 +19,7 @@ import pytest
|
||||
from superset.utils.core import form_data_to_adhoc, simple_filter_to_adhoc
|
||||
|
||||
|
||||
def test_simple_filter_to_adhoc_generates_deterministic_values():
|
||||
def test_simple_filter_to_adhoc_generates_deterministic_values(app_context):
|
||||
input_1 = {
|
||||
"op": "IS NOT NULL",
|
||||
"col": "LATITUDE",
|
||||
@@ -30,13 +30,16 @@ def test_simple_filter_to_adhoc_generates_deterministic_values():
|
||||
|
||||
# The result is the same when given the same input
|
||||
assert simple_filter_to_adhoc(input_1) == simple_filter_to_adhoc(input_1)
|
||||
# SHA-256 filterOptionName hash with default HASH_ALGORITHM
|
||||
assert simple_filter_to_adhoc(input_1) == {
|
||||
"clause": "WHERE",
|
||||
"expressionType": "SIMPLE",
|
||||
"comparator": "",
|
||||
"operator": "IS NOT NULL",
|
||||
"subject": "LATITUDE",
|
||||
"filterOptionName": "6ac89d498115da22396f80a765cffc70",
|
||||
"filterOptionName": (
|
||||
"84ffe4dba1764c30568e19d4dbbf64717fbc514fad1a8a995debfc72b344aa76"
|
||||
),
|
||||
}
|
||||
|
||||
# The result is different when given different input
|
||||
@@ -47,22 +50,27 @@ def test_simple_filter_to_adhoc_generates_deterministic_values():
|
||||
"comparator": "",
|
||||
"operator": "IS NOT NULL",
|
||||
"subject": "LONGITUDE",
|
||||
"filterOptionName": "9c984bd3714883ca859948354ce26ab9",
|
||||
"filterOptionName": (
|
||||
"c5a54054b987350b5594ee73772fbe71e9651a475bfcb7ae740e0799f12c8ff7"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def test_form_data_to_adhoc_generates_deterministic_values():
|
||||
def test_form_data_to_adhoc_generates_deterministic_values(app_context):
|
||||
form_data = {"where": "1 = 1", "having": "count(*) > 1"}
|
||||
|
||||
# The result is the same when given the same input
|
||||
assert form_data_to_adhoc(form_data, "where") == form_data_to_adhoc(
|
||||
form_data, "where"
|
||||
)
|
||||
# SHA-256 filterOptionName hash with default HASH_ALGORITHM
|
||||
assert form_data_to_adhoc(form_data, "where") == {
|
||||
"clause": "WHERE",
|
||||
"expressionType": "SQL",
|
||||
"sqlExpression": "1 = 1",
|
||||
"filterOptionName": "99fe79985afbddea4492626dc6a87b74",
|
||||
"filterOptionName": (
|
||||
"11f7ef40818a0d614cc9a989d5d75ee969b5b3724e973dbf0194e3a339aa0544"
|
||||
),
|
||||
}
|
||||
|
||||
# The result is different when given different input
|
||||
@@ -73,11 +81,13 @@ def test_form_data_to_adhoc_generates_deterministic_values():
|
||||
"clause": "HAVING",
|
||||
"expressionType": "SQL",
|
||||
"sqlExpression": "count(*) > 1",
|
||||
"filterOptionName": "1da11f6b709c3190daeabb84f77fc8c2",
|
||||
"filterOptionName": (
|
||||
"8768cb92fa8a8629695dfe3a4010daefc5d7586934d1aa775f22fb03b46b5dcb"
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def test_form_data_to_adhoc_incorrect_clause_type():
|
||||
def test_form_data_to_adhoc_incorrect_clause_type(app_context):
|
||||
form_data = {"where": "1 = 1", "having": "count(*) > 1"}
|
||||
|
||||
with pytest.raises(ValueError): # noqa: PT011
|
||||
|
||||
@@ -17,80 +17,184 @@
|
||||
import datetime
|
||||
import math
|
||||
from typing import Any
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest # noqa: F401
|
||||
|
||||
from superset.utils.hashing import md5_sha_from_dict, md5_sha_from_str
|
||||
from superset.utils.hashing import hash_from_dict, hash_from_str
|
||||
|
||||
|
||||
def test_basic_md5_sha():
|
||||
obj = {
|
||||
"product": "Coffee",
|
||||
"company": "Gobias Industries",
|
||||
"price_in_cents": 4000,
|
||||
}
|
||||
"""Test basic hashing with MD5 (legacy mode)."""
|
||||
with patch("superset.utils.hashing.get_hash_algorithm", return_value="md5"):
|
||||
obj = {
|
||||
"product": "Coffee",
|
||||
"company": "Gobias Industries",
|
||||
"price_in_cents": 4000,
|
||||
}
|
||||
|
||||
serialized_obj = (
|
||||
'{"company": "Gobias Industries", "price_in_cents": 4000, "product": "Coffee"}'
|
||||
)
|
||||
serialized_obj = '{"company": "Gobias Industries", "price_in_cents": 4000, "product": "Coffee"}' # noqa: E501
|
||||
|
||||
assert md5_sha_from_str(serialized_obj) == md5_sha_from_dict(obj)
|
||||
assert md5_sha_from_str(serialized_obj) == "35f22273cd6a6798b04f8ddef51135e3"
|
||||
assert hash_from_str(serialized_obj) == hash_from_dict(obj)
|
||||
assert hash_from_str(serialized_obj) == "35f22273cd6a6798b04f8ddef51135e3"
|
||||
|
||||
|
||||
def test_basic_sha256():
|
||||
"""Test basic hashing with SHA-256 (FedRAMP compliant mode)."""
|
||||
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
|
||||
obj = {
|
||||
"product": "Coffee",
|
||||
"company": "Gobias Industries",
|
||||
"price_in_cents": 4000,
|
||||
}
|
||||
|
||||
serialized_obj = '{"company": "Gobias Industries", "price_in_cents": 4000, "product": "Coffee"}' # noqa: E501
|
||||
|
||||
assert hash_from_str(serialized_obj) == hash_from_dict(obj)
|
||||
# SHA-256 hash of the serialized object
|
||||
assert (
|
||||
hash_from_str(serialized_obj)
|
||||
== "77bc5927f828903888572ab91c4f3114b36609ca5fb92039bef380d622cef596"
|
||||
)
|
||||
|
||||
|
||||
def test_sort_order_md5_sha():
|
||||
obj_1 = {
|
||||
"product": "Coffee",
|
||||
"price_in_cents": 4000,
|
||||
"company": "Gobias Industries",
|
||||
}
|
||||
"""Test dictionary key order independence with MD5."""
|
||||
with patch("superset.utils.hashing.get_hash_algorithm", return_value="md5"):
|
||||
obj_1 = {
|
||||
"product": "Coffee",
|
||||
"price_in_cents": 4000,
|
||||
"company": "Gobias Industries",
|
||||
}
|
||||
|
||||
obj_2 = {
|
||||
"product": "Coffee",
|
||||
"company": "Gobias Industries",
|
||||
"price_in_cents": 4000,
|
||||
}
|
||||
obj_2 = {
|
||||
"product": "Coffee",
|
||||
"company": "Gobias Industries",
|
||||
"price_in_cents": 4000,
|
||||
}
|
||||
|
||||
assert md5_sha_from_dict(obj_1) == md5_sha_from_dict(obj_2)
|
||||
assert md5_sha_from_dict(obj_1) == "35f22273cd6a6798b04f8ddef51135e3"
|
||||
assert hash_from_dict(obj_1) == hash_from_dict(obj_2)
|
||||
assert hash_from_dict(obj_1) == "35f22273cd6a6798b04f8ddef51135e3"
|
||||
|
||||
|
||||
def test_sort_order_sha256():
|
||||
"""Test dictionary key order independence with SHA-256."""
|
||||
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
|
||||
obj_1 = {
|
||||
"product": "Coffee",
|
||||
"price_in_cents": 4000,
|
||||
"company": "Gobias Industries",
|
||||
}
|
||||
|
||||
obj_2 = {
|
||||
"product": "Coffee",
|
||||
"company": "Gobias Industries",
|
||||
"price_in_cents": 4000,
|
||||
}
|
||||
|
||||
assert hash_from_dict(obj_1) == hash_from_dict(obj_2)
|
||||
assert (
|
||||
hash_from_dict(obj_1)
|
||||
== "77bc5927f828903888572ab91c4f3114b36609ca5fb92039bef380d622cef596"
|
||||
)
|
||||
|
||||
|
||||
def test_custom_default_md5_sha():
|
||||
def custom_datetime_serializer(obj: Any):
|
||||
if isinstance(obj, datetime.datetime):
|
||||
return "<datetime>"
|
||||
"""Test custom serializer with MD5."""
|
||||
with patch("superset.utils.hashing.get_hash_algorithm", return_value="md5"):
|
||||
|
||||
obj = {
|
||||
"product": "Coffee",
|
||||
"company": "Gobias Industries",
|
||||
"datetime": datetime.datetime.now(),
|
||||
}
|
||||
def custom_datetime_serializer(obj: Any):
|
||||
if isinstance(obj, datetime.datetime):
|
||||
return "<datetime>"
|
||||
|
||||
serialized_obj = '{"company": "Gobias Industries", "datetime": "<datetime>", "product": "Coffee"}' # noqa: E501
|
||||
obj = {
|
||||
"product": "Coffee",
|
||||
"company": "Gobias Industries",
|
||||
"datetime": datetime.datetime.now(),
|
||||
}
|
||||
|
||||
assert md5_sha_from_str(serialized_obj) == md5_sha_from_dict(
|
||||
obj, default=custom_datetime_serializer
|
||||
)
|
||||
assert md5_sha_from_str(serialized_obj) == "dc280121213aabcaeb8087aef268fd0d"
|
||||
serialized_obj = '{"company": "Gobias Industries", "datetime": "<datetime>", "product": "Coffee"}' # noqa: E501
|
||||
|
||||
assert hash_from_str(serialized_obj) == hash_from_dict(
|
||||
obj, default=custom_datetime_serializer
|
||||
)
|
||||
assert hash_from_str(serialized_obj) == "dc280121213aabcaeb8087aef268fd0d"
|
||||
|
||||
|
||||
def test_custom_default_sha256():
|
||||
"""Test custom serializer with SHA-256."""
|
||||
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
|
||||
|
||||
def custom_datetime_serializer(obj: Any):
|
||||
if isinstance(obj, datetime.datetime):
|
||||
return "<datetime>"
|
||||
|
||||
obj = {
|
||||
"product": "Coffee",
|
||||
"company": "Gobias Industries",
|
||||
"datetime": datetime.datetime.now(),
|
||||
}
|
||||
|
||||
serialized_obj = '{"company": "Gobias Industries", "datetime": "<datetime>", "product": "Coffee"}' # noqa: E501
|
||||
|
||||
assert hash_from_str(serialized_obj) == hash_from_dict(
|
||||
obj, default=custom_datetime_serializer
|
||||
)
|
||||
assert (
|
||||
hash_from_str(serialized_obj)
|
||||
== "417b57b6f3979bdd0937286f2dc872089fcd5fdb7daad1d3dbcaae1e34cc564e"
|
||||
)
|
||||
|
||||
|
||||
def test_ignore_nan_md5_sha():
|
||||
obj = {
|
||||
"product": "Coffee",
|
||||
"company": "Gobias Industries",
|
||||
"price": math.nan,
|
||||
}
|
||||
"""Test NaN handling with MD5."""
|
||||
with patch("superset.utils.hashing.get_hash_algorithm", return_value="md5"):
|
||||
obj = {
|
||||
"product": "Coffee",
|
||||
"company": "Gobias Industries",
|
||||
"price": math.nan,
|
||||
}
|
||||
|
||||
serialized_obj = (
|
||||
'{"company": "Gobias Industries", "price": NaN, "product": "Coffee"}'
|
||||
)
|
||||
serialized_obj = (
|
||||
'{"company": "Gobias Industries", "price": NaN, "product": "Coffee"}'
|
||||
)
|
||||
|
||||
assert md5_sha_from_str(serialized_obj) == md5_sha_from_dict(obj)
|
||||
assert md5_sha_from_str(serialized_obj) == "5d129d1dffebc0bacc734366476d586d"
|
||||
assert hash_from_str(serialized_obj) == hash_from_dict(obj)
|
||||
assert hash_from_str(serialized_obj) == "5d129d1dffebc0bacc734366476d586d"
|
||||
|
||||
serialized_obj = (
|
||||
'{"company": "Gobias Industries", "price": null, "product": "Coffee"}'
|
||||
)
|
||||
serialized_obj = (
|
||||
'{"company": "Gobias Industries", "price": null, "product": "Coffee"}'
|
||||
)
|
||||
|
||||
assert md5_sha_from_str(serialized_obj) == md5_sha_from_dict(obj, ignore_nan=True)
|
||||
assert md5_sha_from_str(serialized_obj) == "40e87d61f6add03816bccdeac5713b9f"
|
||||
assert hash_from_str(serialized_obj) == hash_from_dict(obj, ignore_nan=True)
|
||||
assert hash_from_str(serialized_obj) == "40e87d61f6add03816bccdeac5713b9f"
|
||||
|
||||
|
||||
def test_ignore_nan_sha256():
|
||||
"""Test NaN handling with SHA-256."""
|
||||
with patch("superset.utils.hashing.get_hash_algorithm", return_value="sha256"):
|
||||
obj = {
|
||||
"product": "Coffee",
|
||||
"company": "Gobias Industries",
|
||||
"price": math.nan,
|
||||
}
|
||||
|
||||
serialized_obj = (
|
||||
'{"company": "Gobias Industries", "price": NaN, "product": "Coffee"}'
|
||||
)
|
||||
|
||||
assert hash_from_str(serialized_obj) == hash_from_dict(obj)
|
||||
assert (
|
||||
hash_from_str(serialized_obj)
|
||||
== "efff87146d137b2d0392eff94b74e7644c3a6b135b91563400029995b9236820"
|
||||
)
|
||||
|
||||
serialized_obj = (
|
||||
'{"company": "Gobias Industries", "price": null, "product": "Coffee"}'
|
||||
)
|
||||
|
||||
assert hash_from_str(serialized_obj) == hash_from_dict(obj, ignore_nan=True)
|
||||
assert (
|
||||
hash_from_str(serialized_obj)
|
||||
== "9b66e0af1cb74aa58c3ab08654c086ebfdada14b1e6312b4002edc854d99d24d"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user