# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """Tests for data_loading.py UUID extraction functionality.""" from pathlib import Path from tempfile import TemporaryDirectory from unittest.mock import patch import yaml def test_get_dataset_config_from_yaml_extracts_uuid(): """Test that UUID is extracted from dataset.yaml.""" from superset.examples.data_loading import get_dataset_config_from_yaml with TemporaryDirectory() as tmpdir: example_dir = Path(tmpdir) dataset_yaml = example_dir / "dataset.yaml" dataset_yaml.write_text( yaml.dump( { "table_name": "test_table", "uuid": "12345678-1234-1234-1234-123456789012", "schema": "public", } ) ) config = get_dataset_config_from_yaml(example_dir) assert config["uuid"] == "12345678-1234-1234-1234-123456789012" assert config["table_name"] == "test_table" assert config["schema"] == "public" def test_get_dataset_config_from_yaml_without_uuid(): """Test that missing UUID returns None.""" from superset.examples.data_loading import get_dataset_config_from_yaml with TemporaryDirectory() as tmpdir: example_dir = Path(tmpdir) dataset_yaml = example_dir / "dataset.yaml" dataset_yaml.write_text( yaml.dump( { "table_name": "test_table", "schema": "public", } ) ) config = get_dataset_config_from_yaml(example_dir) assert config["uuid"] is None assert config["table_name"] == "test_table" def test_get_dataset_config_from_yaml_no_file(): """Test behavior when dataset.yaml doesn't exist.""" from superset.examples.data_loading import get_dataset_config_from_yaml with TemporaryDirectory() as tmpdir: example_dir = Path(tmpdir) config = get_dataset_config_from_yaml(example_dir) assert config["uuid"] is None assert config["table_name"] is None assert config["schema"] is None def test_get_dataset_config_from_yaml_treats_main_schema_as_none(): """Test that SQLite's 'main' schema is treated as None.""" from superset.examples.data_loading import get_dataset_config_from_yaml with TemporaryDirectory() as tmpdir: example_dir = Path(tmpdir) dataset_yaml = example_dir / "dataset.yaml" dataset_yaml.write_text( yaml.dump( { "table_name": "test_table", "schema": "main", # SQLite default schema } ) ) config = get_dataset_config_from_yaml(example_dir) assert config["schema"] is None def test_get_multi_dataset_config_extracts_uuid(): """Test that UUID is extracted from datasets/{name}.yaml.""" from superset.examples.data_loading import _get_multi_dataset_config with TemporaryDirectory() as tmpdir: example_dir = Path(tmpdir) datasets_dir = example_dir / "datasets" datasets_dir.mkdir() dataset_yaml = datasets_dir / "test_dataset.yaml" dataset_yaml.write_text( yaml.dump( { "table_name": "custom_table_name", "uuid": "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee", "schema": "public", } ) ) data_file = example_dir / "data" / "test_dataset.parquet" config = _get_multi_dataset_config(example_dir, "test_dataset", data_file) assert config["uuid"] == "aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee" assert config["table_name"] == "custom_table_name" def test_get_multi_dataset_config_without_yaml(): """Test behavior when datasets/{name}.yaml doesn't exist.""" from superset.examples.data_loading import _get_multi_dataset_config with TemporaryDirectory() as tmpdir: example_dir = Path(tmpdir) data_file = example_dir / "data" / "test_dataset.parquet" config = _get_multi_dataset_config(example_dir, "test_dataset", data_file) assert config.get("uuid") is None assert config["table_name"] == "test_dataset" def test_get_multi_dataset_config_treats_main_schema_as_none(): """Test that SQLite's 'main' schema is treated as None in multi-dataset config.""" from superset.examples.data_loading import _get_multi_dataset_config with TemporaryDirectory() as tmpdir: example_dir = Path(tmpdir) datasets_dir = example_dir / "datasets" datasets_dir.mkdir() dataset_yaml = datasets_dir / "test_dataset.yaml" dataset_yaml.write_text( yaml.dump( { "table_name": "test_table", "schema": "main", } ) ) data_file = example_dir / "data" / "test_dataset.parquet" config = _get_multi_dataset_config(example_dir, "test_dataset", data_file) assert config["schema"] is None def test_discover_datasets_passes_uuid_to_loader(): """Test that discover_datasets passes UUID from YAML to create_generic_loader.""" from superset.examples.data_loading import discover_datasets with TemporaryDirectory() as tmpdir: examples_dir = Path(tmpdir) # Create a simple example with data.parquet and dataset.yaml example_dir = examples_dir / "test_example" example_dir.mkdir() (example_dir / "data.parquet").touch() (example_dir / "dataset.yaml").write_text( yaml.dump( { "table_name": "test_table", "uuid": "12345678-1234-1234-1234-123456789012", } ) ) with patch( "superset.examples.data_loading.get_examples_directory", return_value=examples_dir, ): with patch( "superset.examples.data_loading.create_generic_loader" ) as mock_create: mock_create.return_value = lambda: None discover_datasets() mock_create.assert_called_once() call_kwargs = mock_create.call_args[1] assert call_kwargs["uuid"] == "12345678-1234-1234-1234-123456789012"