fix: Update dataset's last modified date from column/metric update (#33626)

This commit is contained in:
Vitor Avila
2025-06-03 12:20:38 -03:00
committed by GitHub
parent fa0c5891bf
commit cacf1e06d6
6 changed files with 149 additions and 38 deletions

View File

@@ -49,7 +49,6 @@ from sqlalchemy import (
String,
Table as DBTable,
Text,
update,
)
from sqlalchemy.engine.base import Connection
from sqlalchemy.ext.declarative import declared_attr
@@ -2023,21 +2022,6 @@ class SqlaTable(
target.load_database()
security_manager.dataset_before_update(mapper, connection, target)
@staticmethod
def update_column( # pylint: disable=unused-argument
mapper: Mapper, connection: Connection, target: SqlMetric | TableColumn
) -> None:
"""
:param mapper: Unused.
:param connection: Unused.
:param target: The metric or column that was updated.
"""
session = inspect(target).session # pylint: disable=disallowed-name
# Forces an update to the table's changed_on value when a metric or column on the # noqa: E501
# table is updated. This busts the cache key for all charts that use the table.
session.execute(update(SqlaTable).where(SqlaTable.id == target.table.id))
@staticmethod
def after_insert(
mapper: Mapper,
@@ -2073,8 +2057,6 @@ class SqlaTable(
sa.event.listen(SqlaTable, "before_update", SqlaTable.before_update)
sa.event.listen(SqlaTable, "after_insert", SqlaTable.after_insert)
sa.event.listen(SqlaTable, "after_delete", SqlaTable.after_delete)
sa.event.listen(SqlMetric, "after_update", SqlaTable.update_column)
sa.event.listen(TableColumn, "after_update", SqlaTable.update_column)
RLSFilterRoles = DBTable(
"rls_filter_roles",

View File

@@ -17,7 +17,7 @@
from __future__ import annotations
import logging
from datetime import datetime
from datetime import datetime, timezone
from typing import Any
import dateutil.parser
@@ -184,15 +184,21 @@ class DatasetDAO(BaseDAO[SqlaTable]):
"""
if item and attributes:
force_update: bool = False
if "columns" in attributes:
cls.update_columns(
item,
attributes.pop("columns"),
override_columns=bool(attributes.get("override_columns")),
)
force_update = True
if "metrics" in attributes:
cls.update_metrics(item, attributes.pop("metrics"))
force_update = True
if force_update:
attributes["changed_on"] = datetime.now(tz=timezone.utc)
return super().update(item, attributes)

View File

@@ -17,6 +17,7 @@
from __future__ import annotations
import unittest
from datetime import timedelta
from io import BytesIO
from unittest.mock import ANY, patch
from zipfile import is_zipfile, ZipFile
@@ -24,6 +25,7 @@ from zipfile import is_zipfile, ZipFile
import prison
import pytest
import yaml
from freezegun import freeze_time
from sqlalchemy import inspect
from sqlalchemy.exc import SQLAlchemyError
from sqlalchemy.orm import joinedload
@@ -1144,9 +1146,9 @@ class TestDatasetApi(SupersetTestCase):
"""
Dataset API: Test update dataset create column
"""
# create example dataset by Command
dataset = self.insert_default_dataset()
current_changed_on = dataset.changed_on
new_column_data = {
"column_name": "new_col",
@@ -1188,13 +1190,16 @@ class TestDatasetApi(SupersetTestCase):
metric.pop("type_generic", None)
data["result"]["metrics"].append(new_metric_data)
rv = self.client.put(
uri,
json={
"columns": data["result"]["columns"],
"metrics": data["result"]["metrics"],
},
)
with freeze_time() as frozen:
frozen.tick(delta=timedelta(seconds=3))
rv = self.client.put(
uri,
json={
"columns": data["result"]["columns"],
"metrics": data["result"]["metrics"],
},
)
assert rv.status_code == 200
@@ -1233,6 +1238,10 @@ class TestDatasetApi(SupersetTestCase):
assert metrics[1].warning_text == new_metric_data["warning_text"]
assert str(metrics[1].uuid) == new_metric_data["uuid"]
# Validate that the changed_on is updated
updated_dataset = db.session.query(SqlaTable).filter_by(id=dataset.id).first()
assert updated_dataset.changed_on > current_changed_on
self.items_to_delete = [dataset]
def test_update_dataset_delete_column(self):

View File

@@ -14,6 +14,7 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import copy
import re
import time
from typing import Any
@@ -30,7 +31,7 @@ from superset.common.chart_data import ChartDataResultFormat, ChartDataResultTyp
from superset.common.query_context import QueryContext
from superset.common.query_context_factory import QueryContextFactory
from superset.common.query_object import QueryObject
from superset.connectors.sqla.models import SqlMetric
from superset.daos.dataset import DatasetDAO
from superset.daos.datasource import DatasourceDAO
from superset.extensions import cache_manager
from superset.superset_typing import AdhocColumn
@@ -47,6 +48,7 @@ from tests.integration_tests.conftest import (
only_sqlite,
with_feature_flags,
)
from tests.integration_tests.constants import ADMIN_USERNAME
from tests.integration_tests.fixtures.birth_names_dashboard import (
load_birth_names_dashboard_with_slices, # noqa: F401
load_birth_names_data, # noqa: F401
@@ -171,17 +173,28 @@ class TestQueryContext(SupersetTestCase):
assert cache_key_original != cache_key_new
def test_query_cache_key_changes_when_metric_is_updated(self):
"""
Test that the query cache key changes when a metric is updated.
"""
self.login(ADMIN_USERNAME)
payload = get_query_context("birth_names")
# make temporary change and revert it to refresh the changed_on property
datasource = DatasourceDAO.get_datasource(
datasource_type=DatasourceType(payload["datasource"]["type"]),
datasource_id=payload["datasource"]["id"],
)
datasource.metrics.append(SqlMetric(metric_name="foo", expression="select 1;"))
dataset = DatasetDAO.find_by_id(payload["datasource"]["id"])
dataset_payload = {
"metrics": [
{
"metric_name": "foo",
"expression": "select 1;",
}
]
}
DatasetDAO.update(dataset, copy.deepcopy(dataset_payload))
db.session.commit()
# Add metric ID to the payload for future update
updated_dataset = DatasetDAO.find_by_id(dataset.id)
dataset_payload["metrics"][0]["id"] = updated_dataset.metrics[0].id
# construct baseline query_cache_key
query_context = ChartDataQueryContextSchema().load(payload)
query_object = query_context.queries[0]
@@ -190,7 +203,8 @@ class TestQueryContext(SupersetTestCase):
# wait a second since mysql records timestamps in second granularity
time.sleep(1)
datasource.metrics[0].expression = "select 2;"
dataset_payload["metrics"][0]["expression"] = "select 2;"
DatasetDAO.update(updated_dataset, copy.deepcopy(dataset_payload))
db.session.commit()
# create new QueryContext with unchanged attributes, extract new query_cache_key
@@ -198,7 +212,8 @@ class TestQueryContext(SupersetTestCase):
query_object = query_context.queries[0]
cache_key_new = query_context.query_cache_key(query_object)
datasource.metrics = []
dataset_payload["metrics"] = []
DatasetDAO.update(updated_dataset, dataset_payload)
db.session.commit()
# the new cache_key should be different due to updated datasource

View File

@@ -95,7 +95,7 @@ def test_update_dataset_forbidden(mocker: MockerFixture) -> None:
),
],
)
def test_update_validation_errors(
def test_update_dataset_validation_errors(
payload: dict[str, Any],
exception: Exception,
error_msg: str,

View File

@@ -15,8 +15,16 @@
# specific language governing permissions and limitations
# under the License.
import copy
from datetime import datetime, timezone
from typing import Any
from unittest.mock import MagicMock, patch
import pytest
from freezegun import freeze_time
from sqlalchemy.orm.session import Session
from superset.daos.base import BaseDAO
from superset.daos.dataset import DatasetDAO
from superset.sql_parse import Table
@@ -77,3 +85,94 @@ def test_validate_update_uniqueness(session: Session) -> None:
)
is True
)
@freeze_time("2025-01-01 00:00:00")
@patch.object(DatasetDAO, "update_columns")
@patch.object(DatasetDAO, "update_metrics")
@patch.object(BaseDAO, "update")
@pytest.mark.parametrize(
"attributes,expected_attributes",
[
(
{
"columns": [{"id": 1, "name": "col1"}],
"metrics": [{"id": 1, "name": "metric1"}],
},
{"changed_on": datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc)},
),
(
{
"columns": [{"id": 1, "name": "col1"}],
"metrics": [{"id": 1, "name": "metric1"}],
"description": "test description",
},
{
"description": "test description",
"changed_on": datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
},
),
(
{
"columns": [{"id": 1, "name": "col1"}],
},
{"changed_on": datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc)},
),
(
{
"columns": [{"id": 1, "name": "col1"}],
"description": "test description",
},
{
"description": "test description",
"changed_on": datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
},
),
(
{
"metrics": [{"id": 1, "name": "metric1"}],
},
{"changed_on": datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc)},
),
(
{
"metrics": [{"id": 1, "name": "metric1"}],
"description": "test description",
},
{
"description": "test description",
"changed_on": datetime(2025, 1, 1, 0, 0, 0, tzinfo=timezone.utc),
},
),
(
{"description": "test description"},
{"description": "test description"},
),
],
)
def test_update_dataset_related_metadata_updates_changed_on(
base_update_mock: MagicMock,
update_metrics_mock: MagicMock,
update_columns_mock: MagicMock,
attributes: dict[str, Any],
expected_attributes: dict[str, Any],
) -> None:
"""
Test that the changed_on property is updated when a metric or column is updated.
"""
item = MagicMock()
DatasetDAO.update(item, copy.deepcopy(attributes))
if "columns" in attributes:
update_columns_mock.assert_called_once_with(
item, attributes["columns"], override_columns=False
)
else:
update_columns_mock.assert_not_called()
if "metrics" in attributes:
update_metrics_mock.assert_called_once_with(item, attributes["metrics"])
else:
update_metrics_mock.assert_not_called()
base_update_mock.assert_called_once_with(item, expected_attributes)