mirror of
https://github.com/apache/superset.git
synced 2026-07-01 12:25:32 +00:00
Adds a structured per-field change log alongside the foundational
shadow tables. Each save flush emits zero or more ``version_changes``
rows describing what changed relative to the previous version, with
shape ``[{kind, path, from_value, to_value, sequence}]`` keyed to
``version_transaction.id`` (FR-016 .. FR-021).
**Schema** — ``version_changes`` table, FK to ``version_transaction``
with ``ON DELETE CASCADE`` so retention drops dependent records
without explicit cleanup. Composite unique index on
``(transaction_id, entity_kind, entity_id, sequence)`` so the
listener can write monotonically and downstream readers see a
deterministic order.
**Diff engine** (``superset/versioning/diff.py``) — pure-function
diffing of pre-/post-state pairs:
- ``diff_scalar_fields`` for ordinary columns; emits one record per
changed field with JSON-safe ``from_value`` / ``to_value``.
- ``diff_json_field`` for ``json_metadata`` and ``params``, walking
the parsed structure and emitting per-sub-key records. Honours
an ``exclude_keys`` set
(``_DASHBOARD_JSON_METADATA_AUDIT_KEYS``: ``chart_configuration``,
``global_chart_configuration``, ``map_label_colors``,
``show_chart_timestamps``, ``color_namespace``;
``_CHART_PARAMS_AUDIT_KEYS``) so frontend-stamped sub-keys that
mutate on every save don't dominate the change log (FR-022).
- ``diff_dashboard_layout`` walks ``position_json`` structurally
and emits ``[verb, kind, id]`` records (verbs ``add``, ``remove``,
``move``, ``edit``; kinds from a ``CHART``/``ROW``/``COLUMN``/etc.
→ english map) so a UI can render "Added chart 'Foo'" without
re-parsing JSON. ``HEADER_ID`` is suppressed because it duplicates
the ``dashboard_title`` scalar record.
- ``fold_dashboard_layout_with_chart_changes`` deduplicates layout
records against M2M / chart-membership records by UUID so an
add-and-attach doesn't appear twice.
- ``_values_equivalent`` treats ``None`` and ``""`` as equal; this
matches the save path's habit of normalising nullable strings to
the empty string.
**Listener** — ``superset/versioning/changes.py`` registers a
``before_flush`` listener that captures pre-state for each dirty
entity and an ``after_flush`` listener that runs the diff engine
against the post-state and writes ``version_changes`` rows under
the resolved ``transaction_id``. Tracks processed transaction ids
on ``session.info`` so re-firings within a single transaction
(autoflush triggered by mid-commit queries) don't double-insert and
trip the unique constraint. Reads child rows via raw SELECT against
``table_columns`` / ``sql_metrics`` rather than ``dataset.columns``
because the live collection is stale during the restore path's raw
DELETE+INSERT cycle.
**Endpoint surface** — ``VersionDAO.list_change_records_batch``
batches the lookup across multiple transactions with a single
``WHERE transaction_id IN (...)`` query so the version-list
endpoint avoids N+1 round-trips. ``list_versions`` / ``get_version``
return entries with a populated ``changes`` array (empty for
``operation_type=0`` baseline rows).
**Tests** — ``test_diff.py`` covers the diff engine shape (39
unit cases across scalar, JSON, layout, child-collection, and
fold paths). ``change_records_tests.py`` exercises the listener
end-to-end with realistic save flows. ``perf_validation_tests.py``
is the T044 harness for SC-002/3/4 (list endpoint p95 < 1s,
restore < 3s, save overhead < 50ms).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1085 lines
35 KiB
Python
1085 lines
35 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
"""Unit tests for ``superset.versioning.diff`` (T051).
|
|
|
|
Pure-function tests — no app context, no DB. Covers:
|
|
|
|
- (a) scalar field change
|
|
- (b) filter added / removed / modified (Slice params)
|
|
- (c) metric added / removed (Slice params + dataset SqlMetric)
|
|
- (d) column added / removed / type-changed (dataset TableColumn)
|
|
- (e) ``dashboard_slices`` added / removed
|
|
- (f) replay round-trip — applying records in order reconstructs post-state (SC-008)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from copy import deepcopy
|
|
from typing import Any
|
|
|
|
from superset.utils import json as _json
|
|
from superset.versioning.diff import (
|
|
ChangeRecord,
|
|
diff_dashboard,
|
|
diff_dashboard_slices,
|
|
diff_dataset,
|
|
diff_dataset_columns,
|
|
diff_dataset_metrics,
|
|
diff_json_field,
|
|
diff_scalar_fields,
|
|
diff_slice,
|
|
diff_slice_params,
|
|
scalar_fields_for,
|
|
)
|
|
|
|
# Field universes used by tests. In production the listener passes the
|
|
# result of ``scalar_fields_for(ModelClass, special=...)``; in tests we
|
|
# pass explicit sets so assertions remain stable even if a contributor
|
|
# later adds or renames a column on the real model.
|
|
|
|
_SLICE_TEST_FIELDS: frozenset[str] = frozenset(
|
|
{
|
|
"slice_name",
|
|
"datasource_type",
|
|
"datasource_id",
|
|
"viz_type",
|
|
"description",
|
|
"cache_timeout",
|
|
"external_url",
|
|
"is_managed_externally",
|
|
"certified_by",
|
|
"certification_details",
|
|
}
|
|
)
|
|
|
|
_DASHBOARD_TEST_FIELDS: frozenset[str] = frozenset(
|
|
{
|
|
"dashboard_title",
|
|
"position_json",
|
|
"json_metadata",
|
|
"slug",
|
|
"css",
|
|
"external_url",
|
|
"is_managed_externally",
|
|
"certified_by",
|
|
"certification_details",
|
|
"published",
|
|
}
|
|
)
|
|
|
|
_DATASET_TEST_FIELDS: frozenset[str] = frozenset(
|
|
{
|
|
"table_name",
|
|
"sql",
|
|
"description",
|
|
"cache_timeout",
|
|
"template_params",
|
|
"extra",
|
|
"main_dttm_col",
|
|
"default_endpoint",
|
|
"offset",
|
|
"schema",
|
|
"catalog",
|
|
"filter_select_enabled",
|
|
"fetch_values_predicate",
|
|
"is_sqllab_view",
|
|
"is_managed_externally",
|
|
"external_url",
|
|
"normalize_columns",
|
|
"always_filter_main_dttm",
|
|
}
|
|
)
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# (a) Scalar field change
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_slice_scalar_rename() -> None:
|
|
pre = {"slice_name": "Sales Report"}
|
|
post = {"slice_name": "Sales Report Q1"}
|
|
records = diff_slice(pre, post, fields=_SLICE_TEST_FIELDS)
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="field",
|
|
path=["slice_name"],
|
|
from_value="Sales Report",
|
|
to_value="Sales Report Q1",
|
|
)
|
|
]
|
|
|
|
|
|
def test_slice_scalar_unchanged_emits_nothing() -> None:
|
|
pre = {"slice_name": "Sales Report", "description": "x"}
|
|
post = {"slice_name": "Sales Report", "description": "x"}
|
|
assert diff_slice(pre, post, fields=_SLICE_TEST_FIELDS) == []
|
|
|
|
|
|
def test_dashboard_scalar_change_falls_through_to_field() -> None:
|
|
pre = {"dashboard_title": "Old", "position_json": '{"a":1}'}
|
|
post = {"dashboard_title": "New", "position_json": '{"a":2}'}
|
|
records = diff_dashboard(pre, post, fields=_DASHBOARD_TEST_FIELDS)
|
|
assert len(records) == 2
|
|
kinds = {r.kind for r in records}
|
|
assert kinds == {"field"}
|
|
paths = {tuple(r.path) for r in records}
|
|
assert paths == {("dashboard_title",), ("position_json",)}
|
|
|
|
|
|
def test_dataset_scalar_change_falls_through_to_field() -> None:
|
|
pre = {"sql": "SELECT 1", "description": "old"}
|
|
post = {"sql": "SELECT 2", "description": "new"}
|
|
records = diff_dataset(pre, post, fields=_DATASET_TEST_FIELDS)
|
|
kinds = {r.kind for r in records}
|
|
paths = {tuple(r.path) for r in records}
|
|
assert kinds == {"field"}
|
|
assert paths == {("sql",), ("description",)}
|
|
|
|
|
|
def test_unknown_fields_are_ignored() -> None:
|
|
# Fields outside the known scalar set are silently skipped — we
|
|
# don't emit spurious ``field`` records for ORM-internal columns.
|
|
pre = {"__unmapped__": "x"}
|
|
post = {"__unmapped__": "y"}
|
|
assert diff_slice(pre, post, fields=_SLICE_TEST_FIELDS) == []
|
|
assert diff_dashboard(pre, post, fields=_DASHBOARD_TEST_FIELDS) == []
|
|
assert diff_dataset(pre, post, fields=_DATASET_TEST_FIELDS) == []
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# scalar_fields_for — model reflection
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class _FakeColumn:
|
|
"""Stand-in for a SQLAlchemy ``Column`` that exposes just ``.name``."""
|
|
|
|
def __init__(self, name: str) -> None:
|
|
self.name = name
|
|
|
|
|
|
class _FakeTable:
|
|
"""Stand-in for ``Model.__table__`` that exposes an iterable ``columns``."""
|
|
|
|
def __init__(self, column_names: list[str]) -> None:
|
|
self.columns = [_FakeColumn(n) for n in column_names]
|
|
|
|
|
|
def test_scalar_fields_for_strips_audit_and_excludes() -> None:
|
|
"""Reflection excludes __versioned__.exclude + audit fields + special."""
|
|
|
|
class _Model:
|
|
__table__ = _FakeTable(
|
|
[
|
|
"id",
|
|
"uuid",
|
|
"name",
|
|
"description",
|
|
"secret_field",
|
|
"created_on",
|
|
"changed_on",
|
|
"created_by_fk",
|
|
"changed_by_fk",
|
|
"params",
|
|
]
|
|
)
|
|
__versioned__ = {"exclude": ["secret_field"]}
|
|
|
|
result = scalar_fields_for(_Model, special=frozenset({"params"}))
|
|
assert result == frozenset({"name", "description"})
|
|
|
|
|
|
def test_scalar_fields_for_no_versioned_attr() -> None:
|
|
"""Models without ``__versioned__`` work — exclude defaults to empty."""
|
|
|
|
class _Model:
|
|
__table__ = _FakeTable(["id", "name", "created_on"])
|
|
|
|
result = scalar_fields_for(_Model)
|
|
assert result == frozenset({"name"})
|
|
|
|
|
|
def test_scalar_fields_for_empty_versioned_dict() -> None:
|
|
"""``__versioned__ = {}`` is treated as no additional exclusions."""
|
|
|
|
class _Model:
|
|
__table__ = _FakeTable(["id", "name"])
|
|
__versioned__: dict[str, Any] = {}
|
|
|
|
result = scalar_fields_for(_Model)
|
|
assert result == frozenset({"name"})
|
|
|
|
|
|
def test_scalar_fields_for_no_table_returns_empty() -> None:
|
|
"""Objects without ``__table__`` produce an empty set, not an error."""
|
|
|
|
class _NotAModel:
|
|
pass
|
|
|
|
assert scalar_fields_for(_NotAModel) == frozenset()
|
|
|
|
|
|
def test_scalar_fields_for_custom_field_in_derivative() -> None:
|
|
"""Derivatives get custom scalar fields without editing ``diff.py``."""
|
|
|
|
class _DerivedSlice:
|
|
"""Simulates a downstream fork that added ``preset_embedded_config``."""
|
|
|
|
__table__ = _FakeTable(
|
|
[
|
|
"id",
|
|
"uuid",
|
|
"slice_name",
|
|
"params",
|
|
"preset_embedded_config", # downstream addition
|
|
"created_on",
|
|
"changed_on",
|
|
"created_by_fk",
|
|
"changed_by_fk",
|
|
]
|
|
)
|
|
__versioned__ = {"exclude": ["query_context"]}
|
|
|
|
result = scalar_fields_for(_DerivedSlice, special=frozenset({"params"}))
|
|
# Core and downstream fields both appear — zero maintenance in diff.py.
|
|
assert "slice_name" in result
|
|
assert "preset_embedded_config" in result
|
|
assert "params" not in result # handled specially
|
|
assert "id" not in result # audit
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# diff_scalar_fields — generic primitive used by all entity types
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_diff_scalar_fields_only_emits_changed_fields() -> None:
|
|
pre = {"a": 1, "b": "x", "c": True}
|
|
post = {"a": 2, "b": "x", "c": False}
|
|
records = diff_scalar_fields(pre, post, fields={"a", "b", "c"})
|
|
paths = {tuple(r.path) for r in records}
|
|
assert paths == {("a",), ("c",)}
|
|
assert {r.kind for r in records} == {"field"}
|
|
|
|
|
|
def test_diff_scalar_fields_ignores_fields_outside_universe() -> None:
|
|
# ``extra`` differs, but isn't in the fields set → no record.
|
|
pre = {"a": 1, "extra": 100}
|
|
post = {"a": 2, "extra": 200}
|
|
records = diff_scalar_fields(pre, post, fields={"a"})
|
|
assert len(records) == 1
|
|
assert records[0].path == ["a"]
|
|
|
|
|
|
def test_null_to_empty_string_is_not_a_change() -> None:
|
|
"""Superset's save path normalises nullable strings (``css``,
|
|
``certified_by``, ``certification_details``) to ``""`` on first
|
|
write. The transition ``null → ""`` carries no user-authored
|
|
signal and must not produce a record. Same for the reverse.
|
|
"""
|
|
# Both directions silently pass.
|
|
pre = {"css": None, "certified_by": "", "title": "Old"}
|
|
post = {"css": "", "certified_by": None, "title": "New"}
|
|
records = diff_scalar_fields(pre, post, fields={"css", "certified_by", "title"})
|
|
paths = [r.path for r in records]
|
|
assert ["css"] not in paths
|
|
assert ["certified_by"] not in paths
|
|
# Real change still emits.
|
|
assert ["title"] in paths
|
|
|
|
|
|
def test_real_string_change_still_emits() -> None:
|
|
"""Sanity: the null/"" filter must not swallow genuine edits."""
|
|
pre = {"description": ""}
|
|
post = {"description": "non-empty"}
|
|
records = diff_scalar_fields(pre, post, fields={"description"})
|
|
assert len(records) == 1
|
|
assert records[0].from_value == ""
|
|
assert records[0].to_value == "non-empty"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# (b) Chart params — filters
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
FILTER_COUNTRY = {
|
|
"subject": "country",
|
|
"operator": "==",
|
|
"comparator": "Canada",
|
|
"expressionType": "SIMPLE",
|
|
}
|
|
FILTER_COUNTRY_REGION = {
|
|
"subject": "country",
|
|
"operator": "==",
|
|
"comparator": "Canada/Quebec",
|
|
"expressionType": "SIMPLE",
|
|
}
|
|
FILTER_DATE = {
|
|
"subject": "order_date",
|
|
"operator": ">",
|
|
"comparator": "2020-01-01",
|
|
"expressionType": "SIMPLE",
|
|
}
|
|
|
|
|
|
def _params_json(**kwargs: Any) -> str:
|
|
return _json.dumps(kwargs)
|
|
|
|
|
|
def test_filter_added() -> None:
|
|
records = diff_slice_params(
|
|
_params_json(adhoc_filters=[]),
|
|
_params_json(adhoc_filters=[FILTER_COUNTRY]),
|
|
)
|
|
assert len(records) == 1
|
|
r = records[0]
|
|
assert r.kind == "filter"
|
|
assert r.path == ["params", "adhoc_filters", "country"]
|
|
assert r.from_value is None
|
|
assert r.to_value == FILTER_COUNTRY
|
|
|
|
|
|
def test_filter_removed() -> None:
|
|
records = diff_slice_params(
|
|
_params_json(adhoc_filters=[FILTER_COUNTRY, FILTER_DATE]),
|
|
_params_json(adhoc_filters=[FILTER_DATE]),
|
|
)
|
|
assert len(records) == 1
|
|
r = records[0]
|
|
assert r.kind == "filter"
|
|
assert r.path == ["params", "adhoc_filters", "country"]
|
|
assert r.from_value == FILTER_COUNTRY
|
|
assert r.to_value is None
|
|
|
|
|
|
def test_filter_modified_same_subject() -> None:
|
|
records = diff_slice_params(
|
|
_params_json(adhoc_filters=[FILTER_COUNTRY]),
|
|
_params_json(adhoc_filters=[FILTER_COUNTRY_REGION]),
|
|
)
|
|
assert len(records) == 1
|
|
r = records[0]
|
|
assert r.kind == "filter"
|
|
assert r.path == ["params", "adhoc_filters", "country"]
|
|
assert r.from_value == FILTER_COUNTRY
|
|
assert r.to_value == FILTER_COUNTRY_REGION
|
|
|
|
|
|
def test_filter_insert_in_middle_is_still_one_record() -> None:
|
|
# Position-based diffing would emit three records for this case.
|
|
# Natural-key diffing emits exactly one.
|
|
records = diff_slice_params(
|
|
_params_json(adhoc_filters=[FILTER_COUNTRY, FILTER_DATE]),
|
|
_params_json(
|
|
adhoc_filters=[
|
|
FILTER_COUNTRY,
|
|
{"subject": "city", "operator": "in", "comparator": ["Montreal"]},
|
|
FILTER_DATE,
|
|
]
|
|
),
|
|
)
|
|
assert len(records) == 1
|
|
assert records[0].path == ["params", "adhoc_filters", "city"]
|
|
assert records[0].from_value is None
|
|
assert records[0].to_value["subject"] == "city"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# (b-continued) Chart params — scalar first-class kinds
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_time_range_change() -> None:
|
|
records = diff_slice_params(
|
|
_params_json(time_range="Last week"),
|
|
_params_json(time_range="Last month"),
|
|
)
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="time_range",
|
|
path=["params", "time_range"],
|
|
from_value="Last week",
|
|
to_value="Last month",
|
|
)
|
|
]
|
|
|
|
|
|
def test_time_range_added_from_null() -> None:
|
|
records = diff_slice_params(
|
|
_params_json(),
|
|
_params_json(time_range="Last week"),
|
|
)
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="time_range",
|
|
path=["params", "time_range"],
|
|
from_value=None,
|
|
to_value="Last week",
|
|
)
|
|
]
|
|
|
|
|
|
def test_color_palette_change() -> None:
|
|
records = diff_slice_params(
|
|
_params_json(color_scheme="supersetColors"),
|
|
_params_json(color_scheme="presetColors"),
|
|
)
|
|
assert records[0].kind == "color_palette"
|
|
assert records[0].path == ["params", "color_scheme"]
|
|
|
|
|
|
def test_unknown_params_sub_key_falls_through_to_field() -> None:
|
|
records = diff_slice_params(
|
|
_params_json(something_custom="x"),
|
|
_params_json(something_custom="y"),
|
|
)
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="field",
|
|
path=["params", "something_custom"],
|
|
from_value="x",
|
|
to_value="y",
|
|
)
|
|
]
|
|
|
|
|
|
def test_params_audit_keys_are_excluded() -> None:
|
|
"""``params.slice_id`` is a machine-stamped self-reference and must
|
|
not produce a record. Superset's save paths add or refresh it on
|
|
every save (see ``superset/views/core.py``), so without this filter
|
|
every chart save would emit a spurious ``["params", "slice_id"]``
|
|
record on the first save after the key was missing.
|
|
"""
|
|
# slice_id added (null → 104): no record.
|
|
assert diff_slice_params(_params_json(), _params_json(slice_id=104)) == []
|
|
# slice_id changed (101 → 104): no record.
|
|
assert (
|
|
diff_slice_params(_params_json(slice_id=101), _params_json(slice_id=104)) == []
|
|
)
|
|
# slice_id alongside a real edit: only the real edit is emitted.
|
|
records = diff_slice_params(
|
|
_params_json(slice_id=104, time_range="Last week"),
|
|
_params_json(slice_id=104, time_range="Last month"),
|
|
)
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="time_range",
|
|
path=["params", "time_range"],
|
|
from_value="Last week",
|
|
to_value="Last month",
|
|
)
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# (c) Chart params — metrics
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
METRIC_SUM_SALES = {
|
|
"label": "SUM(sales)",
|
|
"aggregate": "SUM",
|
|
"column": {"column_name": "sales"},
|
|
"expressionType": "SIMPLE",
|
|
}
|
|
METRIC_COUNT_ORDERS = {
|
|
"label": "COUNT(orders)",
|
|
"aggregate": "COUNT",
|
|
"column": {"column_name": "orders"},
|
|
"expressionType": "SIMPLE",
|
|
}
|
|
|
|
|
|
def test_chart_metric_added() -> None:
|
|
records = diff_slice_params(
|
|
_params_json(metrics=[]),
|
|
_params_json(metrics=[METRIC_SUM_SALES]),
|
|
)
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="metric",
|
|
path=["params", "metrics", "SUM(sales)"],
|
|
from_value=None,
|
|
to_value=METRIC_SUM_SALES,
|
|
)
|
|
]
|
|
|
|
|
|
def test_chart_metric_removed() -> None:
|
|
records = diff_slice_params(
|
|
_params_json(metrics=[METRIC_SUM_SALES, METRIC_COUNT_ORDERS]),
|
|
_params_json(metrics=[METRIC_COUNT_ORDERS]),
|
|
)
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="metric",
|
|
path=["params", "metrics", "SUM(sales)"],
|
|
from_value=METRIC_SUM_SALES,
|
|
to_value=None,
|
|
)
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# (c-continued) Chart params — dimensions
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_dimension_added() -> None:
|
|
records = diff_slice_params(
|
|
_params_json(groupby=["country"]),
|
|
_params_json(groupby=["country", "city"]),
|
|
)
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="dimension",
|
|
path=["params", "groupby", "city"],
|
|
from_value=None,
|
|
to_value="city",
|
|
)
|
|
]
|
|
|
|
|
|
def test_dimension_removed() -> None:
|
|
records = diff_slice_params(
|
|
_params_json(groupby=["country", "city"]),
|
|
_params_json(groupby=["country"]),
|
|
)
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="dimension",
|
|
path=["params", "groupby", "city"],
|
|
from_value="city",
|
|
to_value=None,
|
|
)
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# (d) Dataset columns
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
COLUMN_COUNTRY = {"column_name": "country", "type": "VARCHAR(255)", "is_dttm": False}
|
|
COLUMN_COUNTRY_TEXT = {"column_name": "country", "type": "TEXT", "is_dttm": False}
|
|
COLUMN_DATE = {"column_name": "order_date", "type": "DATE", "is_dttm": True}
|
|
|
|
|
|
def test_column_added() -> None:
|
|
records = diff_dataset_columns([], [COLUMN_COUNTRY])
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="column",
|
|
path=["columns", "country"],
|
|
from_value=None,
|
|
to_value=COLUMN_COUNTRY,
|
|
)
|
|
]
|
|
|
|
|
|
def test_column_removed() -> None:
|
|
records = diff_dataset_columns([COLUMN_COUNTRY, COLUMN_DATE], [COLUMN_DATE])
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="column",
|
|
path=["columns", "country"],
|
|
from_value=COLUMN_COUNTRY,
|
|
to_value=None,
|
|
)
|
|
]
|
|
|
|
|
|
def test_column_type_changed() -> None:
|
|
records = diff_dataset_columns([COLUMN_COUNTRY], [COLUMN_COUNTRY_TEXT])
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="column",
|
|
path=["columns", "country"],
|
|
from_value=COLUMN_COUNTRY,
|
|
to_value=COLUMN_COUNTRY_TEXT,
|
|
)
|
|
]
|
|
|
|
|
|
def test_column_unchanged_emits_nothing() -> None:
|
|
assert diff_dataset_columns([COLUMN_COUNTRY], [COLUMN_COUNTRY]) == []
|
|
|
|
|
|
def test_column_audit_only_change_is_ignored() -> None:
|
|
"""Refreshed ``changed_on`` alone must not produce a record.
|
|
|
|
Reproduces the dataset-editor scenario where adding one calculated
|
|
column refreshes ``changed_on`` on every other column as a
|
|
side-effect of the save. Before the audit-field strip, each
|
|
untouched column produced a spurious 'changed' record.
|
|
"""
|
|
pre = {
|
|
"column_name": "country",
|
|
"type": "VARCHAR",
|
|
"id": 1226,
|
|
"table_id": 17,
|
|
"changed_on": "2026-04-24T18:49:07.368009",
|
|
"created_on": "2026-04-24T18:49:07.368008",
|
|
"changed_by_fk": 1,
|
|
"created_by_fk": 1,
|
|
}
|
|
post = dict(pre, changed_on="2026-04-24T18:49:07.502720")
|
|
assert diff_dataset_columns([pre], [post]) == []
|
|
|
|
|
|
def test_column_id_change_with_same_content_is_ignored() -> None:
|
|
"""``override_columns`` re-insert gives new ids; don't fire a record.
|
|
|
|
Under DatasetDAO.update_columns' override_columns pattern a
|
|
column's row can be deleted and re-inserted with the same natural
|
|
key (``column_name``) and content but a new auto-increment id.
|
|
The natural key matches, so we don't emit add+remove; the id-only
|
|
difference must be filtered so we don't emit a spurious 'changed'.
|
|
"""
|
|
pre = {"column_name": "country", "type": "VARCHAR", "id": 1226, "table_id": 17}
|
|
post = dict(pre, id=1234)
|
|
assert diff_dataset_columns([pre], [post]) == []
|
|
|
|
|
|
def test_column_real_content_change_still_emits() -> None:
|
|
"""After stripping audit fields, a genuine content change still fires."""
|
|
pre = {
|
|
"column_name": "country",
|
|
"type": "VARCHAR",
|
|
"id": 1226,
|
|
"changed_on": "2026-04-24T18:49:07.368009",
|
|
}
|
|
post = dict(pre, type="TEXT", changed_on="2026-04-24T18:49:07.502720")
|
|
records = diff_dataset_columns([pre], [post])
|
|
assert len(records) == 1
|
|
# Stripped values reach the renderer — no audit noise in the record.
|
|
assert "changed_on" not in records[0].from_value
|
|
assert "changed_on" not in records[0].to_value
|
|
assert records[0].from_value["type"] == "VARCHAR"
|
|
assert records[0].to_value["type"] == "TEXT"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# (d-continued) Dataset metrics
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
DATASET_METRIC_SUM = {"metric_name": "sum_sales", "expression": "SUM(sales)"}
|
|
DATASET_METRIC_AVG = {"metric_name": "avg_sales", "expression": "AVG(sales)"}
|
|
|
|
|
|
def test_dataset_metric_added() -> None:
|
|
records = diff_dataset_metrics([], [DATASET_METRIC_SUM])
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="metric",
|
|
path=["metrics", "sum_sales"],
|
|
from_value=None,
|
|
to_value=DATASET_METRIC_SUM,
|
|
)
|
|
]
|
|
|
|
|
|
def test_dataset_metric_removed() -> None:
|
|
records = diff_dataset_metrics(
|
|
[DATASET_METRIC_SUM, DATASET_METRIC_AVG], [DATASET_METRIC_AVG]
|
|
)
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="metric",
|
|
path=["metrics", "sum_sales"],
|
|
from_value=DATASET_METRIC_SUM,
|
|
to_value=None,
|
|
)
|
|
]
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# (e) Dashboard slices (chart membership)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_dashboard_chart_added() -> None:
|
|
records = diff_dashboard_slices(["u-1"], ["u-1", "u-2"])
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="chart",
|
|
path=["slices", "u-2"],
|
|
from_value=None,
|
|
to_value="u-2",
|
|
)
|
|
]
|
|
|
|
|
|
def test_dashboard_chart_removed() -> None:
|
|
records = diff_dashboard_slices(["u-1", "u-2"], ["u-1"])
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="chart",
|
|
path=["slices", "u-2"],
|
|
from_value="u-2",
|
|
to_value=None,
|
|
)
|
|
]
|
|
|
|
|
|
def test_dashboard_chart_no_change() -> None:
|
|
assert diff_dashboard_slices(["u-1"], ["u-1"]) == []
|
|
|
|
|
|
def test_dashboard_chart_swap_emits_add_plus_remove() -> None:
|
|
records = diff_dashboard_slices(["u-1"], ["u-2"])
|
|
kinds = {r.kind for r in records}
|
|
tos = {r.to_value for r in records}
|
|
froms = {r.from_value for r in records}
|
|
assert kinds == {"chart"}
|
|
assert tos == {"u-2", None}
|
|
assert froms == {"u-1", None}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# (e2) Dashboard JSON-blob fields (json_metadata, position_json)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_diff_json_field_emits_per_changed_top_level_key() -> None:
|
|
"""Each changed top-level key produces a separate record.
|
|
|
|
Mirrors the behaviour of ``diff_slice_params`` for chart params:
|
|
walking the parsed JSON dict means a save that only adds
|
|
``map_label_colors`` doesn't also re-emit the entire blob — only
|
|
one record for that key.
|
|
"""
|
|
pre = _json.dumps({"color_scheme": "", "label_colors": {}, "refresh_frequency": 0})
|
|
post = _json.dumps(
|
|
{
|
|
"color_scheme": "", # unchanged
|
|
"label_colors": {}, # unchanged
|
|
"refresh_frequency": 30, # changed
|
|
"map_label_colors": {"x": "#fff"}, # added
|
|
}
|
|
)
|
|
records = diff_json_field("json_metadata", pre, post)
|
|
paths = {tuple(r.path) for r in records}
|
|
assert paths == {
|
|
("json_metadata", "refresh_frequency"),
|
|
("json_metadata", "map_label_colors"),
|
|
}
|
|
assert {r.kind for r in records} == {"field"}
|
|
|
|
|
|
def test_diff_json_field_treats_null_and_empty_string_as_equivalent() -> None:
|
|
"""A key that flips from missing/null/"" to "" produces no record."""
|
|
pre = _json.dumps({"color_scheme": None, "label_colors": {}})
|
|
post = _json.dumps({"color_scheme": "", "label_colors": {}})
|
|
assert diff_json_field("json_metadata", pre, post) == []
|
|
|
|
|
|
def test_diff_json_field_handles_invalid_or_null_input() -> None:
|
|
"""Malformed JSON / None / non-string values must not crash —
|
|
both sides degrade to the empty dict, so no records are emitted.
|
|
"""
|
|
assert diff_json_field("json_metadata", None, None) == []
|
|
assert diff_json_field("json_metadata", "not-json", "{}") == []
|
|
assert diff_json_field("position_json", "{}", None) == []
|
|
|
|
|
|
def test_diff_dashboard_walks_json_blobs_structurally() -> None:
|
|
"""Full dashboard diff: scalar edit + json_metadata edit produce
|
|
one record each, keyed by sub-path. The json_metadata blob is
|
|
NOT emitted as a single opaque ``["json_metadata"]`` record.
|
|
"""
|
|
pre = {
|
|
"dashboard_title": "Old",
|
|
"json_metadata": _json.dumps({"refresh_frequency": 0}),
|
|
"position_json": _json.dumps({"GRID_ID": {"type": "GRID"}}),
|
|
}
|
|
post = {
|
|
"dashboard_title": "New",
|
|
"json_metadata": _json.dumps({"refresh_frequency": 30}),
|
|
"position_json": _json.dumps({"GRID_ID": {"type": "GRID"}}),
|
|
}
|
|
records = diff_dashboard(pre, post, fields={"dashboard_title"})
|
|
paths = {tuple(r.path) for r in records}
|
|
assert paths == {
|
|
("dashboard_title",),
|
|
("json_metadata", "refresh_frequency"),
|
|
}
|
|
# Confirm the full json_metadata string is NOT in any record's
|
|
# from/to_value — the structural walk replaced opaque-blob storage.
|
|
for r in records:
|
|
assert "refresh_frequency" not in str(r.from_value or "") or (
|
|
r.path == ["json_metadata", "refresh_frequency"]
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# (f) Replay round-trip — SC-008
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def _apply_field(state: dict[str, Any], path: list[Any], value: Any) -> None:
|
|
"""Generic set-by-path for ``kind="field"`` records."""
|
|
cursor = state
|
|
for seg in path[:-1]:
|
|
cursor = cursor.setdefault(seg, {})
|
|
cursor[path[-1]] = value
|
|
|
|
|
|
def _replay(pre: dict[str, Any], records: list[ChangeRecord]) -> dict[str, Any]:
|
|
"""Apply change records to the pre-state.
|
|
|
|
Dispatches on ``kind`` because named kinds use natural-key paths
|
|
(e.g. ``["columns", "country"]``) that are not valid JSON Pointer
|
|
locations — the replay function has to understand the semantics
|
|
of each kind.
|
|
"""
|
|
state = deepcopy(pre)
|
|
for r in records:
|
|
if r.kind == "field":
|
|
_apply_field(state, r.path, r.to_value)
|
|
elif r.kind == "filter":
|
|
_apply_list_by_key(state, r, list_key="adhoc_filters", id_key="subject")
|
|
elif r.kind == "metric" and r.path[:2] == ["params", "metrics"]:
|
|
_apply_list_by_key(state, r, list_key="metrics", id_key="label")
|
|
elif r.kind == "metric" and r.path[:1] == ["metrics"]:
|
|
_apply_dataset_list_by_key(
|
|
state, r, list_key="metrics", id_key="metric_name"
|
|
)
|
|
elif r.kind == "column":
|
|
_apply_dataset_list_by_key(
|
|
state, r, list_key="columns", id_key="column_name"
|
|
)
|
|
elif r.kind == "dimension":
|
|
_apply_scalar_list_by_key(state, r)
|
|
elif r.kind in ("time_range", "color_palette"):
|
|
params = _coerce_params_in_state(state)
|
|
params[r.path[-1]] = r.to_value
|
|
state["params"] = _json.dumps(params)
|
|
elif r.kind == "chart":
|
|
_apply_chart_membership(state, r)
|
|
else:
|
|
raise AssertionError(f"replay: unknown kind {r.kind!r}")
|
|
return state
|
|
|
|
|
|
def _coerce_params_in_state(state: dict[str, Any]) -> dict[str, Any]:
|
|
raw = state.get("params")
|
|
if raw is None:
|
|
return {}
|
|
if isinstance(raw, str):
|
|
return _json.loads(raw) if raw else {}
|
|
return raw
|
|
|
|
|
|
def _apply_list_by_key(
|
|
state: dict[str, Any], r: ChangeRecord, list_key: str, id_key: str
|
|
) -> None:
|
|
"""Apply a record to a ``params.<list_key>`` natural-keyed list."""
|
|
params = _coerce_params_in_state(state)
|
|
items = list(params.get(list_key, []))
|
|
natural_key = r.path[-1]
|
|
idx = next(
|
|
(i for i, item in enumerate(items) if item.get(id_key) == natural_key), None
|
|
)
|
|
if r.to_value is None:
|
|
# removal
|
|
if idx is not None:
|
|
items.pop(idx)
|
|
elif idx is not None:
|
|
# modify in place
|
|
items[idx] = r.to_value
|
|
else:
|
|
items.append(r.to_value)
|
|
params[list_key] = items
|
|
state["params"] = _json.dumps(params)
|
|
|
|
|
|
def _apply_scalar_list_by_key(state: dict[str, Any], r: ChangeRecord) -> None:
|
|
"""Dimension-style: groupby/columns are lists of strings."""
|
|
params = _coerce_params_in_state(state)
|
|
list_key = r.path[1] # "groupby" or "columns"
|
|
items = list(params.get(list_key, []))
|
|
natural_key = r.path[-1]
|
|
if r.to_value is None:
|
|
items = [x for x in items if x != natural_key]
|
|
elif natural_key not in items:
|
|
items.append(r.to_value)
|
|
params[list_key] = items
|
|
state["params"] = _json.dumps(params)
|
|
|
|
|
|
def _apply_dataset_list_by_key(
|
|
state: dict[str, Any], r: ChangeRecord, list_key: str, id_key: str
|
|
) -> None:
|
|
"""Dataset children live at top level, not inside ``params``."""
|
|
items = list(state.get(list_key, []))
|
|
natural_key = r.path[-1]
|
|
idx = next(
|
|
(i for i, item in enumerate(items) if item.get(id_key) == natural_key), None
|
|
)
|
|
if r.to_value is None:
|
|
if idx is not None:
|
|
items.pop(idx)
|
|
elif idx is not None:
|
|
items[idx] = r.to_value
|
|
else:
|
|
items.append(r.to_value)
|
|
state[list_key] = items
|
|
|
|
|
|
def _apply_chart_membership(state: dict[str, Any], r: ChangeRecord) -> None:
|
|
items = list(state.get("slice_uuids", []))
|
|
target = r.path[-1]
|
|
if r.to_value is None:
|
|
items = [u for u in items if u != target]
|
|
elif target not in items:
|
|
items.append(r.to_value)
|
|
state["slice_uuids"] = items
|
|
|
|
|
|
def test_replay_slice_scalar_roundtrip() -> None:
|
|
pre = {"slice_name": "Old", "description": None, "params": _params_json()}
|
|
post = {
|
|
"slice_name": "New",
|
|
"description": "added",
|
|
"params": _params_json(),
|
|
}
|
|
records = diff_slice(pre, post, fields=_SLICE_TEST_FIELDS)
|
|
assert _replay(pre, records)["slice_name"] == post["slice_name"]
|
|
assert _replay(pre, records)["description"] == post["description"]
|
|
|
|
|
|
def test_replay_slice_params_roundtrip_filter_added() -> None:
|
|
pre = {"slice_name": "x", "params": _params_json(adhoc_filters=[])}
|
|
post = {
|
|
"slice_name": "x",
|
|
"params": _params_json(adhoc_filters=[FILTER_COUNTRY]),
|
|
}
|
|
records = diff_slice(pre, post, fields=_SLICE_TEST_FIELDS)
|
|
result = _replay(pre, records)
|
|
assert _json.loads(result["params"]) == _json.loads(post["params"])
|
|
|
|
|
|
def test_replay_slice_params_roundtrip_filter_removed() -> None:
|
|
pre = {
|
|
"slice_name": "x",
|
|
"params": _params_json(adhoc_filters=[FILTER_COUNTRY, FILTER_DATE]),
|
|
}
|
|
post = {
|
|
"slice_name": "x",
|
|
"params": _params_json(adhoc_filters=[FILTER_DATE]),
|
|
}
|
|
records = diff_slice(pre, post, fields=_SLICE_TEST_FIELDS)
|
|
result = _replay(pre, records)
|
|
assert _json.loads(result["params"]) == _json.loads(post["params"])
|
|
|
|
|
|
def test_replay_time_range_and_color_palette() -> None:
|
|
pre = {
|
|
"slice_name": "x",
|
|
"params": _params_json(time_range="Last week", color_scheme="supersetColors"),
|
|
}
|
|
post = {
|
|
"slice_name": "x",
|
|
"params": _params_json(time_range="Last month", color_scheme="presetColors"),
|
|
}
|
|
records = diff_slice(pre, post, fields=_SLICE_TEST_FIELDS)
|
|
result = _replay(pre, records)
|
|
assert _json.loads(result["params"]) == _json.loads(post["params"])
|
|
|
|
|
|
def test_replay_dataset_columns_roundtrip() -> None:
|
|
pre = {"columns": [COLUMN_COUNTRY, COLUMN_DATE]}
|
|
post = {"columns": [COLUMN_COUNTRY_TEXT, COLUMN_DATE]} # type-changed
|
|
records = diff_dataset_columns(pre["columns"], post["columns"])
|
|
assert _replay(pre, records)["columns"] == post["columns"]
|
|
|
|
|
|
def test_replay_dataset_metrics_roundtrip() -> None:
|
|
pre = {"metrics": [DATASET_METRIC_SUM]}
|
|
post = {"metrics": [DATASET_METRIC_AVG]} # add avg, remove sum
|
|
records = diff_dataset_metrics(pre["metrics"], post["metrics"])
|
|
result_metrics = _replay(pre, records)["metrics"]
|
|
# order-insensitive comparison
|
|
assert sorted(result_metrics, key=lambda m: m["metric_name"]) == sorted(
|
|
post["metrics"], key=lambda m: m["metric_name"]
|
|
)
|
|
|
|
|
|
def test_replay_dashboard_slices_roundtrip() -> None:
|
|
pre = {"slice_uuids": ["u-1", "u-2"]}
|
|
post = {"slice_uuids": ["u-2", "u-3"]} # remove u-1, add u-3
|
|
records = diff_dashboard_slices(pre["slice_uuids"], post["slice_uuids"])
|
|
result = _replay(pre, records)
|
|
assert sorted(result["slice_uuids"]) == sorted(post["slice_uuids"])
|
|
|
|
|
|
def test_replay_dashboard_scalar_roundtrip() -> None:
|
|
pre = {"dashboard_title": "Old", "position_json": '{"a":1}'}
|
|
post = {"dashboard_title": "New", "position_json": '{"a":2}'}
|
|
records = diff_dashboard(pre, post, fields=_DASHBOARD_TEST_FIELDS)
|
|
assert _replay(pre, records) == {
|
|
"dashboard_title": "New",
|
|
"position_json": '{"a":2}',
|
|
}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Edge cases
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_malformed_params_string_is_treated_as_empty() -> None:
|
|
# If ``params`` is not valid JSON, ``diff_slice_params`` degrades
|
|
# to "no params recorded" rather than crashing the save path.
|
|
records = diff_slice_params("not json", _params_json(time_range="Last week"))
|
|
assert records == [
|
|
ChangeRecord(
|
|
kind="time_range",
|
|
path=["params", "time_range"],
|
|
from_value=None,
|
|
to_value="Last week",
|
|
)
|
|
]
|
|
|
|
|
|
def test_none_params_on_both_sides() -> None:
|
|
assert diff_slice_params(None, None) == []
|
|
|
|
|
|
def test_filter_without_subject_falls_back_to_position() -> None:
|
|
# Keyless filters should not crash; they fall back to the list index.
|
|
filter_no_subject = {"operator": "==", "comparator": "x"}
|
|
records = diff_slice_params(
|
|
_params_json(adhoc_filters=[]),
|
|
_params_json(adhoc_filters=[filter_no_subject]),
|
|
)
|
|
assert len(records) == 1
|
|
assert records[0].kind == "filter"
|
|
assert records[0].to_value == filter_no_subject
|
|
|
|
|
|
def test_empty_state_emits_nothing() -> None:
|
|
assert diff_slice({}, {}, fields=_SLICE_TEST_FIELDS) == []
|
|
assert diff_dashboard({}, {}, fields=_DASHBOARD_TEST_FIELDS) == []
|
|
assert diff_dataset({}, {}, fields=_DATASET_TEST_FIELDS) == []
|
|
assert diff_dataset_columns([], []) == []
|
|
assert diff_dataset_metrics([], []) == []
|
|
assert diff_dashboard_slices([], []) == []
|