mirror of
https://github.com/apache/superset.git
synced 2026-05-25 09:45:18 +00:00
feat(semantic layers): cache
This commit is contained in:
191
tests/unit_tests/semantic_layers/cache_integration_test.py
Normal file
191
tests/unit_tests/semantic_layers/cache_integration_test.py
Normal file
@@ -0,0 +1,191 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""End-to-end test that exercises ``mapper.get_results`` with a live cache."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
from pytest_mock import MockerFixture
|
||||
from superset_core.semantic_layers.types import (
|
||||
Dimension,
|
||||
Metric,
|
||||
SemanticRequest,
|
||||
SemanticResult,
|
||||
)
|
||||
|
||||
from superset.semantic_layers import cache as cache_module
|
||||
from superset.semantic_layers.mapper import get_results, ValidatedQueryObject
|
||||
|
||||
|
||||
class _InMemoryCache:
|
||||
"""Minimal flask-caching compatible cache used to isolate tests."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._store: dict[str, Any] = {}
|
||||
|
||||
def get(self, key: str) -> Any:
|
||||
return self._store.get(key)
|
||||
|
||||
def set(self, key: str, value: Any, timeout: int | None = None) -> bool:
|
||||
self._store[key] = value
|
||||
return True
|
||||
|
||||
def delete(self, key: str) -> bool:
|
||||
return self._store.pop(key, None) is not None
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def fake_cache(mocker: MockerFixture) -> _InMemoryCache:
|
||||
fake = _InMemoryCache()
|
||||
mocker.patch.object(
|
||||
type(cache_module.cache_manager),
|
||||
"data_cache",
|
||||
property(lambda self: fake),
|
||||
)
|
||||
return fake
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def view_implementation() -> Any:
|
||||
"""SemanticView implementation stub with one metric and one dimension."""
|
||||
dim_a = Dimension(id="t.a", name="a", type=pa.int64())
|
||||
metric_x = Metric(id="t.x", name="x", type=pa.float64(), definition="sum(x)")
|
||||
|
||||
impl = MagicMock()
|
||||
impl.metrics = {metric_x}
|
||||
impl.dimensions = {dim_a}
|
||||
impl.features = frozenset()
|
||||
impl.get_metrics = MagicMock(return_value={metric_x})
|
||||
impl.get_dimensions = MagicMock(return_value={dim_a})
|
||||
return impl
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def datasource(view_implementation: Any) -> MagicMock:
|
||||
ds = MagicMock()
|
||||
ds.implementation = view_implementation
|
||||
ds.uuid = "view-uuid-stable"
|
||||
ds.changed_on = datetime(2026, 1, 1, 12, 0, 0)
|
||||
ds.cache_timeout = 60
|
||||
ds.fetch_values_predicate = None
|
||||
return ds
|
||||
|
||||
|
||||
def _result(rows: list[tuple[int, float]]) -> SemanticResult:
|
||||
df = pd.DataFrame(rows, columns=["a", "x"])
|
||||
return SemanticResult(
|
||||
requests=[SemanticRequest(type="SQL", definition="select a, x")],
|
||||
results=pa.Table.from_pandas(df, preserve_index=False),
|
||||
)
|
||||
|
||||
|
||||
def _qo(
|
||||
datasource: MagicMock,
|
||||
filter_op: str | None = None,
|
||||
filter_val: Any = None,
|
||||
limit: int | None = None,
|
||||
) -> ValidatedQueryObject:
|
||||
qo_filters: list[dict[str, Any]] = (
|
||||
[{"col": "a", "op": filter_op, "val": filter_val}] if filter_op else []
|
||||
)
|
||||
return ValidatedQueryObject(
|
||||
datasource=datasource,
|
||||
metrics=["x"],
|
||||
columns=["a"],
|
||||
filters=qo_filters, # type: ignore[arg-type]
|
||||
row_limit=limit,
|
||||
)
|
||||
|
||||
|
||||
def test_narrower_filter_reuses_cache(
|
||||
fake_cache: _InMemoryCache,
|
||||
view_implementation: Any,
|
||||
datasource: MagicMock,
|
||||
) -> None:
|
||||
# The dispatcher returns rows already filtered by `a > 1` (in production it
|
||||
# would; here we hand-feed the result). The second query (a > 2) is a subset
|
||||
# and must be served from the cached DataFrame.
|
||||
cached = _result([(2, 2.0), (3, 3.0), (5, 5.0)])
|
||||
view_implementation.get_table = MagicMock(return_value=cached)
|
||||
|
||||
first = get_results(_qo(datasource, ">", 1))
|
||||
assert view_implementation.get_table.call_count == 1
|
||||
assert sorted(first.df["a"].tolist()) == [2, 3, 5]
|
||||
|
||||
second = get_results(_qo(datasource, ">", 2))
|
||||
assert view_implementation.get_table.call_count == 1 # cache hit
|
||||
assert sorted(second.df["a"].tolist()) == [3, 5]
|
||||
|
||||
|
||||
def test_smaller_limit_reuses_cache(
|
||||
fake_cache: _InMemoryCache,
|
||||
view_implementation: Any,
|
||||
datasource: MagicMock,
|
||||
) -> None:
|
||||
# First call has no limit; second asks for 2 rows — should be served from cache.
|
||||
full = _result([(0, 1.0), (1, 2.0), (2, 3.0), (3, 4.0)])
|
||||
view_implementation.get_table = MagicMock(return_value=full)
|
||||
|
||||
get_results(_qo(datasource, limit=None))
|
||||
assert view_implementation.get_table.call_count == 1
|
||||
|
||||
result = get_results(_qo(datasource, limit=2))
|
||||
assert view_implementation.get_table.call_count == 1 # cache hit
|
||||
assert len(result.df) == 2
|
||||
|
||||
|
||||
def test_broader_filter_misses_cache(
|
||||
fake_cache: _InMemoryCache,
|
||||
view_implementation: Any,
|
||||
datasource: MagicMock,
|
||||
) -> None:
|
||||
view_implementation.get_table = MagicMock(
|
||||
side_effect=[
|
||||
_result([(2, 1.0), (3, 2.0)]),
|
||||
_result([(0, 1.0), (2, 2.0), (3, 3.0)]),
|
||||
]
|
||||
)
|
||||
|
||||
get_results(_qo(datasource, ">", 1))
|
||||
assert view_implementation.get_table.call_count == 1
|
||||
|
||||
# Broader filter — must re-execute.
|
||||
get_results(_qo(datasource, ">", 0))
|
||||
assert view_implementation.get_table.call_count == 2
|
||||
|
||||
|
||||
def test_changed_on_invalidates_cache(
|
||||
fake_cache: _InMemoryCache,
|
||||
view_implementation: Any,
|
||||
datasource: MagicMock,
|
||||
) -> None:
|
||||
view_implementation.get_table = MagicMock(return_value=_result([(2, 1.0)]))
|
||||
|
||||
get_results(_qo(datasource, ">", 1))
|
||||
assert view_implementation.get_table.call_count == 1
|
||||
|
||||
# Bumping changed_on yields a different shape key — cache misses.
|
||||
datasource.changed_on = datetime(2026, 2, 1, 0, 0, 0)
|
||||
get_results(_qo(datasource, ">", 1))
|
||||
assert view_implementation.get_table.call_count == 2
|
||||
396
tests/unit_tests/semantic_layers/cache_test.py
Normal file
396
tests/unit_tests/semantic_layers/cache_test.py
Normal file
@@ -0,0 +1,396 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
import pandas as pd
|
||||
import pyarrow as pa
|
||||
import pytest
|
||||
from superset_core.semantic_layers.types import (
|
||||
Dimension,
|
||||
Filter,
|
||||
Metric,
|
||||
Operator,
|
||||
OrderDirection,
|
||||
PredicateType,
|
||||
SemanticQuery,
|
||||
SemanticRequest,
|
||||
SemanticResult,
|
||||
)
|
||||
|
||||
from superset.semantic_layers.cache import (
|
||||
_apply_post_processing,
|
||||
_implies,
|
||||
CachedEntry,
|
||||
can_satisfy,
|
||||
shape_key,
|
||||
value_key,
|
||||
ViewMeta,
|
||||
)
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def dim(id_: str, name: str | None = None) -> Dimension:
|
||||
return Dimension(id=id_, name=name or id_, type=pa.utf8())
|
||||
|
||||
|
||||
def met(id_: str, name: str | None = None) -> Metric:
|
||||
return Metric(id=id_, name=name or id_, type=pa.float64(), definition="x")
|
||||
|
||||
|
||||
COL_A = dim("col.a", "a")
|
||||
COL_B = dim("col.b", "b")
|
||||
M_X = met("met.x", "x")
|
||||
M_Y = met("met.y", "y")
|
||||
|
||||
VIEW = ViewMeta(uuid="view-1", changed_on_iso="2026-05-01T00:00:00", cache_timeout=None)
|
||||
|
||||
|
||||
def where(column: Dimension | Metric | None, op: Operator, value: Any) -> Filter:
|
||||
return Filter(type=PredicateType.WHERE, column=column, operator=op, value=value)
|
||||
|
||||
|
||||
def having(column: Metric, op: Operator, value: Any) -> Filter:
|
||||
return Filter(type=PredicateType.HAVING, column=column, operator=op, value=value)
|
||||
|
||||
|
||||
def adhoc(definition: str, type_: PredicateType = PredicateType.WHERE) -> Filter:
|
||||
return Filter(type=type_, column=None, operator=Operator.ADHOC, value=definition)
|
||||
|
||||
|
||||
def query(
|
||||
filters: set[Filter] | None = None,
|
||||
limit: int | None = None,
|
||||
order: Any = None,
|
||||
dimensions: list[Dimension] | None = None,
|
||||
metrics: list[Metric] | None = None,
|
||||
) -> SemanticQuery:
|
||||
return SemanticQuery(
|
||||
metrics=metrics if metrics is not None else [M_X],
|
||||
dimensions=dimensions if dimensions is not None else [COL_A, COL_B],
|
||||
filters=filters,
|
||||
order=order,
|
||||
limit=limit,
|
||||
)
|
||||
|
||||
|
||||
def entry_from(q: SemanticQuery, value_key_: str = "vk") -> CachedEntry:
|
||||
from superset.semantic_layers.cache import _group_limit_key, _order_key
|
||||
|
||||
return CachedEntry(
|
||||
filters=frozenset(q.filters or set()),
|
||||
limit=q.limit,
|
||||
offset=q.offset or 0,
|
||||
order_key=_order_key(q.order),
|
||||
group_limit_key=_group_limit_key(q.group_limit),
|
||||
value_key=value_key_,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _implies: scalar range pairs
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"new_op,new_val,cached_op,cached_val,expected",
|
||||
[
|
||||
# narrower lower bound
|
||||
(Operator.GREATER_THAN, 20, Operator.GREATER_THAN, 10, True),
|
||||
(Operator.GREATER_THAN, 10, Operator.GREATER_THAN, 20, False),
|
||||
(Operator.GREATER_THAN_OR_EQUAL, 11, Operator.GREATER_THAN, 10, True),
|
||||
(Operator.GREATER_THAN_OR_EQUAL, 10, Operator.GREATER_THAN, 10, False),
|
||||
(Operator.GREATER_THAN, 10, Operator.GREATER_THAN_OR_EQUAL, 10, True),
|
||||
(Operator.GREATER_THAN, 9, Operator.GREATER_THAN_OR_EQUAL, 10, False),
|
||||
# narrower upper bound
|
||||
(Operator.LESS_THAN, 5, Operator.LESS_THAN, 10, True),
|
||||
(Operator.LESS_THAN_OR_EQUAL, 9, Operator.LESS_THAN, 10, True),
|
||||
(Operator.LESS_THAN_OR_EQUAL, 10, Operator.LESS_THAN, 10, False),
|
||||
# cross-direction — never implies
|
||||
(Operator.LESS_THAN, 5, Operator.GREATER_THAN, 10, False),
|
||||
(Operator.GREATER_THAN, 5, Operator.LESS_THAN, 10, False),
|
||||
# equals fits in range
|
||||
(Operator.EQUALS, 15, Operator.GREATER_THAN, 10, True),
|
||||
(Operator.EQUALS, 10, Operator.GREATER_THAN, 10, False),
|
||||
(Operator.EQUALS, 10, Operator.GREATER_THAN_OR_EQUAL, 10, True),
|
||||
],
|
||||
)
|
||||
def test_implies_range(
|
||||
new_op: Operator,
|
||||
new_val: Any,
|
||||
cached_op: Operator,
|
||||
cached_val: Any,
|
||||
expected: bool,
|
||||
) -> None:
|
||||
assert (
|
||||
_implies(where(COL_A, new_op, new_val), where(COL_A, cached_op, cached_val))
|
||||
is expected
|
||||
)
|
||||
|
||||
|
||||
def test_implies_in_subset() -> None:
|
||||
cached = where(COL_A, Operator.IN, frozenset({"a", "b", "c"}))
|
||||
assert _implies(where(COL_A, Operator.IN, frozenset({"a", "b"})), cached) is True
|
||||
assert _implies(where(COL_A, Operator.IN, frozenset({"a", "d"})), cached) is False
|
||||
# equals to a value in the cached IN set
|
||||
assert _implies(where(COL_A, Operator.EQUALS, "b"), cached) is True
|
||||
assert _implies(where(COL_A, Operator.EQUALS, "z"), cached) is False
|
||||
|
||||
|
||||
def test_implies_in_all_in_range() -> None:
|
||||
cached = where(COL_A, Operator.GREATER_THAN, 10)
|
||||
assert _implies(where(COL_A, Operator.IN, frozenset({11, 12})), cached) is True
|
||||
assert _implies(where(COL_A, Operator.IN, frozenset({10, 12})), cached) is False
|
||||
|
||||
|
||||
def test_implies_equals_exact() -> None:
|
||||
cached = where(COL_A, Operator.EQUALS, 5)
|
||||
assert _implies(where(COL_A, Operator.EQUALS, 5), cached) is True
|
||||
assert _implies(where(COL_A, Operator.EQUALS, 6), cached) is False
|
||||
|
||||
|
||||
def test_implies_is_not_null() -> None:
|
||||
cached = where(COL_A, Operator.IS_NOT_NULL, None)
|
||||
assert _implies(where(COL_A, Operator.GREATER_THAN, 0), cached) is True
|
||||
assert _implies(where(COL_A, Operator.IS_NOT_NULL, None), cached) is True
|
||||
assert _implies(where(COL_A, Operator.IS_NULL, None), cached) is False
|
||||
|
||||
|
||||
def test_implies_like_exact_match_only() -> None:
|
||||
a = where(COL_A, Operator.LIKE, "foo%")
|
||||
b = where(COL_A, Operator.LIKE, "foo%")
|
||||
c = where(COL_A, Operator.LIKE, "bar%")
|
||||
assert _implies(a, b) is True
|
||||
assert _implies(c, b) is False
|
||||
assert _implies(where(COL_A, Operator.EQUALS, "fooz"), b) is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# can_satisfy
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_can_satisfy_empty_cached_returns_all_as_leftovers() -> None:
|
||||
cached_q = query(filters=None)
|
||||
new_q = query(filters={where(COL_A, Operator.GREATER_THAN, 5)})
|
||||
ok, leftovers = can_satisfy(entry_from(cached_q), new_q)
|
||||
assert ok is True
|
||||
assert leftovers == {where(COL_A, Operator.GREATER_THAN, 5)}
|
||||
|
||||
|
||||
def test_can_satisfy_narrower_filter() -> None:
|
||||
cached_q = query(filters={where(COL_A, Operator.GREATER_THAN, 1)})
|
||||
new_q = query(filters={where(COL_A, Operator.GREATER_THAN, 2)})
|
||||
ok, leftovers = can_satisfy(entry_from(cached_q), new_q)
|
||||
assert ok is True
|
||||
assert leftovers == {where(COL_A, Operator.GREATER_THAN, 2)}
|
||||
|
||||
|
||||
def test_can_satisfy_broader_filter_fails() -> None:
|
||||
cached_q = query(filters={where(COL_A, Operator.GREATER_THAN, 2)})
|
||||
new_q = query(filters={where(COL_A, Operator.GREATER_THAN, 1)})
|
||||
ok, leftovers = can_satisfy(entry_from(cached_q), new_q)
|
||||
assert ok is False
|
||||
assert leftovers == set()
|
||||
|
||||
|
||||
def test_can_satisfy_missing_constraint_fails() -> None:
|
||||
cached_q = query(filters={where(COL_A, Operator.GREATER_THAN, 1)})
|
||||
new_q = query(filters=None)
|
||||
ok, _ = can_satisfy(entry_from(cached_q), new_q)
|
||||
assert ok is False
|
||||
|
||||
|
||||
def test_can_satisfy_new_filter_on_extra_column() -> None:
|
||||
cached_q = query(filters={where(COL_A, Operator.GREATER_THAN, 1)})
|
||||
new_q = query(
|
||||
filters={
|
||||
where(COL_A, Operator.GREATER_THAN, 2),
|
||||
where(COL_B, Operator.EQUALS, "x"),
|
||||
}
|
||||
)
|
||||
ok, leftovers = can_satisfy(entry_from(cached_q), new_q)
|
||||
assert ok is True
|
||||
assert leftovers == {
|
||||
where(COL_A, Operator.GREATER_THAN, 2),
|
||||
where(COL_B, Operator.EQUALS, "x"),
|
||||
}
|
||||
|
||||
|
||||
def test_can_satisfy_leftover_on_non_projected_column_fails() -> None:
|
||||
other = dim("col.other", "other")
|
||||
cached_q = query(filters=None)
|
||||
new_q = query(
|
||||
filters={where(other, Operator.EQUALS, "x")},
|
||||
dimensions=[COL_A, COL_B],
|
||||
)
|
||||
ok, _ = can_satisfy(entry_from(cached_q), new_q)
|
||||
assert ok is False
|
||||
|
||||
|
||||
def test_can_satisfy_having_requires_exact_set() -> None:
|
||||
cached_q = query(filters={having(M_X, Operator.GREATER_THAN, 100)})
|
||||
same = query(filters={having(M_X, Operator.GREATER_THAN, 100)})
|
||||
tighter = query(filters={having(M_X, Operator.GREATER_THAN, 200)})
|
||||
ok_same, _ = can_satisfy(entry_from(cached_q), same)
|
||||
ok_tight, _ = can_satisfy(entry_from(cached_q), tighter)
|
||||
assert ok_same is True
|
||||
assert ok_tight is False
|
||||
|
||||
|
||||
def test_can_satisfy_adhoc_requires_exact_set() -> None:
|
||||
cached_q = query(filters={adhoc("col_a > 1")})
|
||||
same = query(filters={adhoc("col_a > 1")})
|
||||
different = query(filters={adhoc("col_a > 2")})
|
||||
ok_same, _ = can_satisfy(entry_from(cached_q), same)
|
||||
ok_diff, _ = can_satisfy(entry_from(cached_q), different)
|
||||
assert ok_same is True
|
||||
assert ok_diff is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Limit / order / offset
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_can_satisfy_unlimited_cached_satisfies_any_limit() -> None:
|
||||
cached_q = query(filters=None, limit=None)
|
||||
new_q = query(filters=None, limit=10)
|
||||
ok, leftovers = can_satisfy(entry_from(cached_q), new_q)
|
||||
assert ok is True
|
||||
assert leftovers == set()
|
||||
|
||||
|
||||
def test_can_satisfy_smaller_limit_with_matching_order() -> None:
|
||||
order = [(M_X, OrderDirection.DESC)]
|
||||
cached_q = query(filters=None, limit=100, order=order)
|
||||
new_q = query(filters=None, limit=10, order=order)
|
||||
ok, _ = can_satisfy(entry_from(cached_q), new_q)
|
||||
assert ok is True
|
||||
|
||||
|
||||
def test_can_satisfy_smaller_limit_different_order_fails() -> None:
|
||||
cached_q = query(filters=None, limit=100, order=[(M_X, OrderDirection.DESC)])
|
||||
new_q = query(filters=None, limit=10, order=[(M_X, OrderDirection.ASC)])
|
||||
ok, _ = can_satisfy(entry_from(cached_q), new_q)
|
||||
assert ok is False
|
||||
|
||||
|
||||
def test_can_satisfy_larger_limit_fails() -> None:
|
||||
cached_q = query(filters=None, limit=10)
|
||||
new_q = query(filters=None, limit=100)
|
||||
ok, _ = can_satisfy(entry_from(cached_q), new_q)
|
||||
assert ok is False
|
||||
|
||||
|
||||
def test_can_satisfy_no_new_limit_when_cached_has_one_fails() -> None:
|
||||
cached_q = query(filters=None, limit=100)
|
||||
new_q = query(filters=None, limit=None)
|
||||
ok, _ = can_satisfy(entry_from(cached_q), new_q)
|
||||
assert ok is False
|
||||
|
||||
|
||||
def test_can_satisfy_offset_never_reused() -> None:
|
||||
cached_q = SemanticQuery(metrics=[M_X], dimensions=[COL_A], offset=5)
|
||||
new_q = SemanticQuery(metrics=[M_X], dimensions=[COL_A], offset=5)
|
||||
ok, _ = can_satisfy(entry_from(cached_q), new_q)
|
||||
assert ok is False
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Post-processing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_apply_post_processing_filters_and_limits() -> None:
|
||||
df = pd.DataFrame({"a": [1, 3, 5, 7, 9], "x": [10, 20, 30, 40, 50]})
|
||||
cached = SemanticResult(
|
||||
requests=[SemanticRequest(type="SQL", definition="select ...")],
|
||||
results=pa.Table.from_pandas(df, preserve_index=False),
|
||||
)
|
||||
new_q = query(
|
||||
filters={where(COL_A, Operator.GREATER_THAN, 2)},
|
||||
limit=2,
|
||||
)
|
||||
result = _apply_post_processing(
|
||||
cached, new_q, {where(COL_A, Operator.GREATER_THAN, 2)}
|
||||
)
|
||||
result_df = result.results.to_pandas()
|
||||
assert list(result_df["a"]) == [3, 5]
|
||||
# the cache annotates the requests with a marker
|
||||
assert any(req.type == "cache" for req in result.requests)
|
||||
|
||||
|
||||
def test_apply_post_processing_no_leftovers_no_limit_returns_original() -> None:
|
||||
df = pd.DataFrame({"a": [1, 2]})
|
||||
cached = SemanticResult(
|
||||
requests=[], results=pa.Table.from_pandas(df, preserve_index=False)
|
||||
)
|
||||
new_q = query(filters=None, limit=None)
|
||||
out = _apply_post_processing(cached, new_q, set())
|
||||
# same object reference is OK; we explicitly return the input
|
||||
assert out is cached
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Hash stability
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_value_key_stable_across_metric_order() -> None:
|
||||
q1 = SemanticQuery(metrics=[M_X, M_Y], dimensions=[COL_A])
|
||||
q2 = SemanticQuery(metrics=[M_Y, M_X], dimensions=[COL_A])
|
||||
assert value_key(VIEW, q1) == value_key(VIEW, q2)
|
||||
|
||||
|
||||
def test_shape_key_stable_across_dimension_order() -> None:
|
||||
q1 = SemanticQuery(metrics=[M_X], dimensions=[COL_A, COL_B])
|
||||
q2 = SemanticQuery(metrics=[M_X], dimensions=[COL_B, COL_A])
|
||||
assert shape_key(VIEW, q1) == shape_key(VIEW, q2)
|
||||
|
||||
|
||||
def test_shape_key_changes_with_changed_on() -> None:
|
||||
q = SemanticQuery(metrics=[M_X], dimensions=[COL_A])
|
||||
other = ViewMeta(uuid=VIEW.uuid, changed_on_iso="2099-01-01", cache_timeout=None)
|
||||
assert shape_key(VIEW, q) != shape_key(other, q)
|
||||
|
||||
|
||||
def test_value_key_changes_with_filter_value() -> None:
|
||||
q1 = SemanticQuery(
|
||||
metrics=[M_X],
|
||||
dimensions=[COL_A],
|
||||
filters={where(COL_A, Operator.GREATER_THAN, 1)},
|
||||
)
|
||||
q2 = SemanticQuery(
|
||||
metrics=[M_X],
|
||||
dimensions=[COL_A],
|
||||
filters={where(COL_A, Operator.GREATER_THAN, 2)},
|
||||
)
|
||||
assert value_key(VIEW, q1) != value_key(VIEW, q2)
|
||||
|
||||
|
||||
def test_value_key_with_datetime_filter() -> None:
|
||||
f = where(COL_A, Operator.GREATER_THAN_OR_EQUAL, datetime(2025, 1, 1))
|
||||
q = SemanticQuery(metrics=[M_X], dimensions=[COL_A], filters={f})
|
||||
# should not raise
|
||||
assert value_key(VIEW, q).startswith("sv:val:")
|
||||
Reference in New Issue
Block a user