mirror of
https://github.com/apache/superset.git
synced 2026-04-07 18:35:15 +00:00
205 lines
7.0 KiB
Python
205 lines
7.0 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
"""Tests for data_loading.py UUID extraction functionality."""
|
|
|
|
from pathlib import Path
|
|
from tempfile import TemporaryDirectory
|
|
from unittest.mock import patch
|
|
|
|
import yaml
|
|
|
|
|
|
def test_get_dataset_config_from_yaml_extracts_uuid():
|
|
"""Test that UUID is extracted from dataset.yaml."""
|
|
from superset.examples.data_loading import get_dataset_config_from_yaml
|
|
|
|
with TemporaryDirectory() as tmpdir:
|
|
example_dir = Path(tmpdir)
|
|
dataset_yaml = example_dir / "dataset.yaml"
|
|
dataset_yaml.write_text(
|
|
yaml.dump(
|
|
{
|
|
"table_name": "test_table",
|
|
"uuid": "12345678-1234-1234-1234-123456789012",
|
|
"schema": "public",
|
|
}
|
|
)
|
|
)
|
|
|
|
config = get_dataset_config_from_yaml(example_dir)
|
|
|
|
assert config["uuid"] == "12345678-1234-1234-1234-123456789012"
|
|
assert config["table_name"] == "test_table"
|
|
assert config["schema"] == "public"
|
|
|
|
|
|
def test_get_dataset_config_from_yaml_without_uuid():
|
|
"""Test that missing UUID returns None."""
|
|
from superset.examples.data_loading import get_dataset_config_from_yaml
|
|
|
|
with TemporaryDirectory() as tmpdir:
|
|
example_dir = Path(tmpdir)
|
|
dataset_yaml = example_dir / "dataset.yaml"
|
|
dataset_yaml.write_text(
|
|
yaml.dump(
|
|
{
|
|
"table_name": "test_table",
|
|
"schema": "public",
|
|
}
|
|
)
|
|
)
|
|
|
|
config = get_dataset_config_from_yaml(example_dir)
|
|
|
|
assert config["uuid"] is None
|
|
assert config["table_name"] == "test_table"
|
|
|
|
|
|
def test_get_dataset_config_from_yaml_no_file():
|
|
"""Test behavior when dataset.yaml doesn't exist."""
|
|
from superset.examples.data_loading import get_dataset_config_from_yaml
|
|
|
|
with TemporaryDirectory() as tmpdir:
|
|
example_dir = Path(tmpdir)
|
|
|
|
config = get_dataset_config_from_yaml(example_dir)
|
|
|
|
assert config["uuid"] is None
|
|
assert config["table_name"] is None
|
|
assert config["schema"] is None
|
|
|
|
|
|
def test_get_dataset_config_from_yaml_treats_main_schema_as_none():
|
|
"""Test that SQLite's 'main' schema is treated as None."""
|
|
from superset.examples.data_loading import get_dataset_config_from_yaml
|
|
|
|
with TemporaryDirectory() as tmpdir:
|
|
example_dir = Path(tmpdir)
|
|
dataset_yaml = example_dir / "dataset.yaml"
|
|
dataset_yaml.write_text(
|
|
yaml.dump(
|
|
{
|
|
"table_name": "test_table",
|
|
"schema": "main", # SQLite default schema
|
|
}
|
|
)
|
|
)
|
|
|
|
config = get_dataset_config_from_yaml(example_dir)
|
|
|
|
assert config["schema"] is None
|
|
|
|
|
|
def test_get_multi_dataset_config_extracts_uuid():
|
|
"""Test that UUID is extracted from datasets/{name}.yaml."""
|
|
from superset.examples.data_loading import _get_multi_dataset_config
|
|
|
|
with TemporaryDirectory() as tmpdir:
|
|
example_dir = Path(tmpdir)
|
|
datasets_dir = example_dir / "datasets"
|
|
datasets_dir.mkdir()
|
|
dataset_yaml = datasets_dir / "test_dataset.yaml"
|
|
dataset_yaml.write_text(
|
|
yaml.dump(
|
|
{
|
|
"table_name": "custom_table_name",
|
|
"uuid": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee",
|
|
"schema": "public",
|
|
}
|
|
)
|
|
)
|
|
|
|
data_file = example_dir / "data" / "test_dataset.parquet"
|
|
config = _get_multi_dataset_config(example_dir, "test_dataset", data_file)
|
|
|
|
assert config["uuid"] == "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee"
|
|
assert config["table_name"] == "custom_table_name"
|
|
|
|
|
|
def test_get_multi_dataset_config_without_yaml():
|
|
"""Test behavior when datasets/{name}.yaml doesn't exist."""
|
|
from superset.examples.data_loading import _get_multi_dataset_config
|
|
|
|
with TemporaryDirectory() as tmpdir:
|
|
example_dir = Path(tmpdir)
|
|
data_file = example_dir / "data" / "test_dataset.parquet"
|
|
|
|
config = _get_multi_dataset_config(example_dir, "test_dataset", data_file)
|
|
|
|
assert config.get("uuid") is None
|
|
assert config["table_name"] == "test_dataset"
|
|
|
|
|
|
def test_get_multi_dataset_config_treats_main_schema_as_none():
|
|
"""Test that SQLite's 'main' schema is treated as None in multi-dataset config."""
|
|
from superset.examples.data_loading import _get_multi_dataset_config
|
|
|
|
with TemporaryDirectory() as tmpdir:
|
|
example_dir = Path(tmpdir)
|
|
datasets_dir = example_dir / "datasets"
|
|
datasets_dir.mkdir()
|
|
dataset_yaml = datasets_dir / "test_dataset.yaml"
|
|
dataset_yaml.write_text(
|
|
yaml.dump(
|
|
{
|
|
"table_name": "test_table",
|
|
"schema": "main",
|
|
}
|
|
)
|
|
)
|
|
|
|
data_file = example_dir / "data" / "test_dataset.parquet"
|
|
config = _get_multi_dataset_config(example_dir, "test_dataset", data_file)
|
|
|
|
assert config["schema"] is None
|
|
|
|
|
|
def test_discover_datasets_passes_uuid_to_loader():
|
|
"""Test that discover_datasets passes UUID from YAML to create_generic_loader."""
|
|
from superset.examples.data_loading import discover_datasets
|
|
|
|
with TemporaryDirectory() as tmpdir:
|
|
examples_dir = Path(tmpdir)
|
|
|
|
# Create a simple example with data.parquet and dataset.yaml
|
|
example_dir = examples_dir / "test_example"
|
|
example_dir.mkdir()
|
|
(example_dir / "data.parquet").touch()
|
|
(example_dir / "dataset.yaml").write_text(
|
|
yaml.dump(
|
|
{
|
|
"table_name": "test_table",
|
|
"uuid": "12345678-1234-1234-1234-123456789012",
|
|
}
|
|
)
|
|
)
|
|
|
|
with patch(
|
|
"superset.examples.data_loading.get_examples_directory",
|
|
return_value=examples_dir,
|
|
):
|
|
with patch(
|
|
"superset.examples.data_loading.create_generic_loader"
|
|
) as mock_create:
|
|
mock_create.return_value = lambda: None
|
|
|
|
discover_datasets()
|
|
|
|
mock_create.assert_called_once()
|
|
call_kwargs = mock_create.call_args[1]
|
|
assert call_kwargs["uuid"] == "12345678-1234-1234-1234-123456789012"
|