Files
superset2/tests/unit_tests/examples/data_loading_test.py
2026-02-12 14:12:12 -08:00

205 lines
7.0 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Tests for data_loading.py UUID extraction functionality."""
from pathlib import Path
from tempfile import TemporaryDirectory
from unittest.mock import patch
import yaml
def test_get_dataset_config_from_yaml_extracts_uuid():
"""Test that UUID is extracted from dataset.yaml."""
from superset.examples.data_loading import get_dataset_config_from_yaml
with TemporaryDirectory() as tmpdir:
example_dir = Path(tmpdir)
dataset_yaml = example_dir / "dataset.yaml"
dataset_yaml.write_text(
yaml.dump(
{
"table_name": "test_table",
"uuid": "12345678-1234-1234-1234-123456789012",
"schema": "public",
}
)
)
config = get_dataset_config_from_yaml(example_dir)
assert config["uuid"] == "12345678-1234-1234-1234-123456789012"
assert config["table_name"] == "test_table"
assert config["schema"] == "public"
def test_get_dataset_config_from_yaml_without_uuid():
"""Test that missing UUID returns None."""
from superset.examples.data_loading import get_dataset_config_from_yaml
with TemporaryDirectory() as tmpdir:
example_dir = Path(tmpdir)
dataset_yaml = example_dir / "dataset.yaml"
dataset_yaml.write_text(
yaml.dump(
{
"table_name": "test_table",
"schema": "public",
}
)
)
config = get_dataset_config_from_yaml(example_dir)
assert config["uuid"] is None
assert config["table_name"] == "test_table"
def test_get_dataset_config_from_yaml_no_file():
"""Test behavior when dataset.yaml doesn't exist."""
from superset.examples.data_loading import get_dataset_config_from_yaml
with TemporaryDirectory() as tmpdir:
example_dir = Path(tmpdir)
config = get_dataset_config_from_yaml(example_dir)
assert config["uuid"] is None
assert config["table_name"] is None
assert config["schema"] is None
def test_get_dataset_config_from_yaml_treats_main_schema_as_none():
"""Test that SQLite's 'main' schema is treated as None."""
from superset.examples.data_loading import get_dataset_config_from_yaml
with TemporaryDirectory() as tmpdir:
example_dir = Path(tmpdir)
dataset_yaml = example_dir / "dataset.yaml"
dataset_yaml.write_text(
yaml.dump(
{
"table_name": "test_table",
"schema": "main", # SQLite default schema
}
)
)
config = get_dataset_config_from_yaml(example_dir)
assert config["schema"] is None
def test_get_multi_dataset_config_extracts_uuid():
"""Test that UUID is extracted from datasets/{name}.yaml."""
from superset.examples.data_loading import _get_multi_dataset_config
with TemporaryDirectory() as tmpdir:
example_dir = Path(tmpdir)
datasets_dir = example_dir / "datasets"
datasets_dir.mkdir()
dataset_yaml = datasets_dir / "test_dataset.yaml"
dataset_yaml.write_text(
yaml.dump(
{
"table_name": "custom_table_name",
"uuid": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee",
"schema": "public",
}
)
)
data_file = example_dir / "data" / "test_dataset.parquet"
config = _get_multi_dataset_config(example_dir, "test_dataset", data_file)
assert config["uuid"] == "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
assert config["table_name"] == "custom_table_name"
def test_get_multi_dataset_config_without_yaml():
"""Test behavior when datasets/{name}.yaml doesn't exist."""
from superset.examples.data_loading import _get_multi_dataset_config
with TemporaryDirectory() as tmpdir:
example_dir = Path(tmpdir)
data_file = example_dir / "data" / "test_dataset.parquet"
config = _get_multi_dataset_config(example_dir, "test_dataset", data_file)
assert config.get("uuid") is None
assert config["table_name"] == "test_dataset"
def test_get_multi_dataset_config_treats_main_schema_as_none():
"""Test that SQLite's 'main' schema is treated as None in multi-dataset config."""
from superset.examples.data_loading import _get_multi_dataset_config
with TemporaryDirectory() as tmpdir:
example_dir = Path(tmpdir)
datasets_dir = example_dir / "datasets"
datasets_dir.mkdir()
dataset_yaml = datasets_dir / "test_dataset.yaml"
dataset_yaml.write_text(
yaml.dump(
{
"table_name": "test_table",
"schema": "main",
}
)
)
data_file = example_dir / "data" / "test_dataset.parquet"
config = _get_multi_dataset_config(example_dir, "test_dataset", data_file)
assert config["schema"] is None
def test_discover_datasets_passes_uuid_to_loader():
"""Test that discover_datasets passes UUID from YAML to create_generic_loader."""
from superset.examples.data_loading import discover_datasets
with TemporaryDirectory() as tmpdir:
examples_dir = Path(tmpdir)
# Create a simple example with data.parquet and dataset.yaml
example_dir = examples_dir / "test_example"
example_dir.mkdir()
(example_dir / "data.parquet").touch()
(example_dir / "dataset.yaml").write_text(
yaml.dump(
{
"table_name": "test_table",
"uuid": "12345678-1234-1234-1234-123456789012",
}
)
)
with patch(
"superset.examples.data_loading.get_examples_directory",
return_value=examples_dir,
):
with patch(
"superset.examples.data_loading.create_generic_loader"
) as mock_create:
mock_create.return_value = lambda: None
discover_datasets()
mock_create.assert_called_once()
call_kwargs = mock_create.call_args[1]
assert call_kwargs["uuid"] == "12345678-1234-1234-1234-123456789012"