Files
superset2/tests/unit_tests/datasets/commands/export_test.py
2026-05-14 11:08:23 -07:00

359 lines
10 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=import-outside-toplevel, unused-argument, unused-import
from uuid import UUID
import yaml
from sqlalchemy.orm.session import Session
from superset import db
from superset.utils import json
def test_export(session: Session) -> None:
"""
Test exporting a dataset.
"""
from superset.commands.dataset.export import ExportDatasetsCommand
from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
from superset.models.core import Database
engine = db.session.get_bind()
SqlaTable.metadata.create_all(engine) # pylint: disable=no-member
database = Database(database_name="my_database", sqlalchemy_uri="sqlite://")
db.session.add(database)
db.session.flush()
columns = [
TableColumn(column_name="ds", is_dttm=1, type="TIMESTAMP"),
TableColumn(column_name="user_id", type="INTEGER"),
TableColumn(column_name="revenue", type="INTEGER"),
TableColumn(column_name="expenses", type="INTEGER"),
TableColumn(
column_name="profit",
type="INTEGER",
expression="revenue-expenses",
extra=json.dumps({"certified_by": "User"}),
uuid=UUID("00000000-0000-0000-0000-000000000005"),
),
]
metrics = [
SqlMetric(
metric_name="cnt",
expression="COUNT(*)",
extra=json.dumps({"warning_markdown": None}),
uuid=UUID("00000000-0000-0000-0000-000000000004"),
),
]
sqla_table = SqlaTable(
table_name="my_table",
columns=columns,
metrics=metrics,
folders=[
{
"uuid": "00000000-0000-0000-0000-000000000000",
"type": "folder",
"name": "Engineering",
"children": [
{
"uuid": "00000000-0000-0000-0000-000000000001",
"type": "folder",
"name": "Core",
"children": [
{
"uuid": "00000000-0000-0000-0000-000000000004",
"type": "metric",
"name": "cnt",
},
],
},
],
},
{
"uuid": "00000000-0000-0000-0000-000000000002",
"type": "folder",
"name": "Sales",
"children": [
{
"uuid": "00000000-0000-0000-0000-000000000003",
"type": "folder",
"name": "Core",
"children": [
{
"uuid": "00000000-0000-0000-0000-000000000005",
"type": "column",
"name": "profit",
},
],
},
],
},
],
main_dttm_col="ds",
database=database,
offset=-8,
description="This is the description",
is_featured=1,
cache_timeout=3600,
catalog="public",
schema="my_schema",
sql=None,
params=json.dumps(
{
"remote_id": 64,
"database_name": "examples",
"import_time": 1606677834,
}
),
perm=None,
filter_select_enabled=1,
fetch_values_predicate="foo IN (1, 2)",
is_sqllab_view=0, # no longer used?
template_params=json.dumps({"answer": "42"}),
schema_perm=None,
normalize_columns=False,
always_filter_main_dttm=False,
extra=json.dumps({"warning_markdown": "*WARNING*"}),
)
# Add the table to the session and flush to get an ID
db.session.add(sqla_table)
db.session.flush()
export = [
(file[0], file[1]())
for file in list(
ExportDatasetsCommand._export(sqla_table) # pylint: disable=protected-access
)
]
payload = sqla_table.export_to_dict(
recursive=True,
include_parent_ref=False,
include_defaults=True,
export_uuids=True,
)
assert export == [
(
f"datasets/my_database/my_table_{sqla_table.id}.yaml",
f"""table_name: my_table
main_dttm_col: ds
currency_code_column: null
description: This is the description
default_endpoint: null
offset: -8
cache_timeout: 3600
catalog: public
schema: my_schema
sql: null
params:
remote_id: 64
database_name: examples
import_time: 1606677834
template_params:
answer: '42'
filter_select_enabled: 1
fetch_values_predicate: foo IN (1, 2)
extra:
warning_markdown: '*WARNING*'
normalize_columns: false
always_filter_main_dttm: false
folders:
- uuid: 00000000-0000-0000-0000-000000000000
type: folder
name: Engineering
children:
- uuid: 00000000-0000-0000-0000-000000000001
type: folder
name: Core
children:
- uuid: 00000000-0000-0000-0000-000000000004
type: metric
name: cnt
- uuid: 00000000-0000-0000-0000-000000000002
type: folder
name: Sales
children:
- uuid: 00000000-0000-0000-0000-000000000003
type: folder
name: Core
children:
- uuid: 00000000-0000-0000-0000-000000000005
type: column
name: profit
uuid: {payload["uuid"]}
metrics:
- metric_name: cnt
verbose_name: null
metric_type: null
expression: COUNT(*)
description: null
d3format: null
currency: null
extra:
warning_markdown: null
warning_text: null
columns:
- column_name: profit
verbose_name: null
is_dttm: false
is_active: true
type: INTEGER
advanced_data_type: null
groupby: true
filterable: true
expression: revenue-expenses
description: null
python_date_format: null
datetime_format: null
extra:
certified_by: User
- column_name: ds
verbose_name: null
is_dttm: 1
is_active: true
type: TIMESTAMP
advanced_data_type: null
groupby: true
filterable: true
expression: null
description: null
python_date_format: null
datetime_format: null
extra: null
- column_name: user_id
verbose_name: null
is_dttm: false
is_active: true
type: INTEGER
advanced_data_type: null
groupby: true
filterable: true
expression: null
description: null
python_date_format: null
datetime_format: null
extra: null
- column_name: expenses
verbose_name: null
is_dttm: false
is_active: true
type: INTEGER
advanced_data_type: null
groupby: true
filterable: true
expression: null
description: null
python_date_format: null
datetime_format: null
extra: null
- column_name: revenue
verbose_name: null
is_dttm: false
is_active: true
type: INTEGER
advanced_data_type: null
groupby: true
filterable: true
expression: null
description: null
python_date_format: null
datetime_format: null
extra: null
version: 1.0.0
database_uuid: {database.uuid}
""",
),
(
"databases/my_database.yaml",
f"""database_name: my_database
sqlalchemy_uri: sqlite://
cache_timeout: null
expose_in_sqllab: true
allow_run_async: false
allow_ctas: false
allow_cvas: false
allow_dml: false
allow_file_upload: false
extra:
metadata_params: {{}}
engine_params: {{}}
metadata_cache_timeout: {{}}
schemas_allowed_for_file_upload: []
impersonate_user: false
configuration_method: sqlalchemy_form
uuid: {database.uuid}
version: 1.0.0
""",
),
]
def test_export_two_datasets_same_table_name_different_schema(
session: Session,
) -> None:
"""
Regression coverage for GitHub issue #16141.
Exporting two datasets that share a `table_name` but live in
different schemas (e.g. prod.users + dev.users) must produce two
distinct entries in the export. Historically the pair could collide
onto a single filename — the export filename is now disambiguated by
dataset id, so this test pins that behavior so it can't silently
regress.
"""
from superset.commands.dataset.export import ExportDatasetsCommand
from superset.connectors.sqla.models import SqlaTable
from superset.models.core import Database
engine = db.session.get_bind()
SqlaTable.metadata.create_all(engine) # pylint: disable=no-member
database = Database(database_name="my_database", sqlalchemy_uri="sqlite://")
db.session.add(database)
db.session.flush()
prod = SqlaTable(table_name="users", schema="prod", database=database)
dev = SqlaTable(table_name="users", schema="dev", database=database)
db.session.add_all([prod, dev])
db.session.flush()
paths: list[str] = []
contents: list[str] = []
for ds in (prod, dev):
for path, content_fn in ExportDatasetsCommand._export( # pylint: disable=protected-access
ds, export_related=False
):
paths.append(path)
contents.append(content_fn())
# Both datasets must produce distinct export paths — no collision.
assert len(paths) == len(set(paths)), (
f"Export filenames collided for same-table-name datasets: {paths}"
)
# And both YAML payloads must reflect their own schema, not be
# silently merged or overwritten.
schemas_in_yaml = {yaml.safe_load(c)["schema"] for c in contents}
assert schemas_in_yaml == {"prod", "dev"}, (
f"Expected both prod and dev schemas in export, got {schemas_in_yaml}"
)