pointing link to master

docs
improve startup script
2026-05-04 23:44:23 +00:00 · 2025-07-29 12:01:05 -07:00 · 2025-07-29 11:19:58 -07:00 · 2025-07-29 11:19:58 -07:00 · 2025-07-29 11:19:58 -07:00 · 2025-07-29 11:19:58 -07:00
36 changed files with 443 additions and 44 deletions
--- a/.devcontainer/README.md
+++ b/.devcontainer/README.md
@@ -0,0 +1,5 @@
+# Superset Development with GitHub Codespaces
+
+For complete documentation on using GitHub Codespaces with Apache Superset, please see:
+
+**[Setting up a Development Environment - GitHub Codespaces](https://superset.apache.org/docs/contributing/development#github-codespaces-cloud-development)**
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,52 @@
+{
+  "name": "Apache Superset Development",
+  // Keep this in sync with the base image in Dockerfile (ARG PY_VER)
+  // Using the same base as Dockerfile, but non-slim for dev tools
+  "image": "python:3.11.13-bookworm",
+
+  "features": {
+    "ghcr.io/devcontainers/features/docker-in-docker:2": {
+      "moby": true,
+      "dockerDashComposeVersion": "v2"
+    },
+    "ghcr.io/devcontainers/features/node:1": {
+      "version": "20"
+    },
+    "ghcr.io/devcontainers/features/git:1": {},
+    "ghcr.io/devcontainers/features/common-utils:2": {
+      "configureZshAsDefaultShell": true
+    },
+    "ghcr.io/devcontainers/features/sshd:1": {
+      "version": "latest"
+    }
+  },
+
+  // Forward ports for development
+  "forwardPorts": [9001],
+  "portsAttributes": {
+    "9001": {
+      "label": "Superset (via Webpack Dev Server)",
+      "onAutoForward": "notify",
+      "visibility": "public"
+    }
+  },
+
+  // Run commands after container is created
+  "postCreateCommand": "chmod +x .devcontainer/setup-dev.sh && .devcontainer/setup-dev.sh",
+
+  // Auto-start Superset on Codespace resume
+  "postStartCommand": ".devcontainer/start-superset.sh",
+
+  // VS Code customizations
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "ms-python.python",
+        "ms-python.vscode-pylance",
+        "charliermarsh.ruff",
+        "dbaeumer.vscode-eslint",
+        "esbenp.prettier-vscode"
+      ]
+    }
+  }
+}
--- a/.devcontainer/setup-dev.sh
+++ b/.devcontainer/setup-dev.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# Setup script for Superset Codespaces development environment
+
+echo "🔧 Setting up Superset development environment..."
+
+# The universal image has most tools, just need Superset-specific libs
+echo "📦 Installing Superset-specific dependencies..."
+sudo apt-get update
+sudo apt-get install -y \
+    libsasl2-dev \
+    libldap2-dev \
+    libpq-dev \
+    tmux \
+    gh
+
+# Install uv for fast Python package management
+echo "📦 Installing uv..."
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# Add cargo/bin to PATH for uv
+echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> ~/.bashrc
+echo 'export PATH="$HOME/.cargo/bin:$PATH"' >> ~/.zshrc
+
+# Install Claude Code CLI via npm
+echo "🤖 Installing Claude Code..."
+npm install -g @anthropic-ai/claude-code
+
+# Make the start script executable
+chmod +x .devcontainer/start-superset.sh
+
+echo "✅ Development environment setup complete!"
+echo "🚀 Run '.devcontainer/start-superset.sh' to start Superset"
--- a/.devcontainer/start-superset.sh
+++ b/.devcontainer/start-superset.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# Startup script for Superset in Codespaces
+
+echo "🚀 Starting Superset in Codespaces..."
+echo "🌐 Frontend will be available at port 9001"
+
+# Find the workspace directory (Codespaces clones as 'superset', not 'superset-2')
+WORKSPACE_DIR=$(find /workspaces -maxdepth 1 -name "superset*" -type d | head -1)
+if [ -n "$WORKSPACE_DIR" ]; then
+    cd "$WORKSPACE_DIR"
+    echo "📁 Working in: $WORKSPACE_DIR"
+else
+    echo "📁 Using current directory: $(pwd)"
+fi
+
+# Check if docker is running
+if ! docker info > /dev/null 2>&1; then
+    echo "⏳ Waiting for Docker to start..."
+    sleep 5
+fi
+
+# Clean up any existing containers
+echo "🧹 Cleaning up existing containers..."
+docker-compose -f docker-compose-light.yml down
+
+# Start services
+echo "🏗️  Building and starting services..."
+echo ""
+echo "📝 Once started, login with:"
+echo "   Username: admin"
+echo "   Password: admin"
+echo ""
+echo "📋 Running in foreground with live logs (Ctrl+C to stop)..."
+
+# Run docker-compose and capture exit code
+docker-compose -f docker-compose-light.yml up
+EXIT_CODE=$?
+
+# If it failed, provide helpful instructions
+if [ $EXIT_CODE -ne 0 ] && [ $EXIT_CODE -ne 130 ]; then  # 130 is Ctrl+C
+    echo ""
+    echo "❌ Superset startup failed (exit code: $EXIT_CODE)"
+    echo ""
+    echo "🔄 To restart Superset, run:"
+    echo "   .devcontainer/start-superset.sh"
+    echo ""
+    echo "🔧 For troubleshooting:"
+    echo "   # View logs:"
+    echo "   docker-compose -f docker-compose-light.yml logs"
+    echo ""
+    echo "   # Clean restart (removes volumes):"
+    echo "   docker-compose -f docker-compose-light.yml down -v"
+    echo "   .devcontainer/start-superset.sh"
+    echo ""
+    echo "   # Common issues:"
+    echo "   - Network timeouts: Just retry, often transient"
+    echo "   - Port conflicts: Check 'docker ps'"
+    echo "   - Database issues: Try clean restart with -v"
+fi
--- a/docs/docs/contributing/development.mdx
+++ b/docs/docs/contributing/development.mdx
@@ -120,6 +120,78 @@ docker volume rm superset_db_home
 docker-compose up
 ```

+## GitHub Codespaces (Cloud Development)
+
+GitHub Codespaces provides a complete, pre-configured development environment in the cloud. This is ideal for:
+- Quick contributions without local setup
+- Consistent development environments across team members
+- Working from devices that can't run Docker locally
+- Safe experimentation in isolated environments
+
+:::info
+We're grateful to GitHub for providing this excellent cloud development service that makes
+contributing to Apache Superset more accessible to developers worldwide.
+:::
+
+### Getting Started with Codespaces
+
+1. **Create a Codespace**: Use this pre-configured link that sets up everything you need:
+
+   [**Launch Superset Codespace →**](https://github.com/codespaces/new?skip_quickstart=true&machine=standardLinux32gb&repo=39464018&ref=master&geo=UsWest&devcontainer_path=.devcontainer%2Fdevcontainer.json)
+
+   :::caution
+   **Important**: You must select at least the **4 CPU / 16GB RAM** machine type (pre-selected in the link above).
+   Smaller instances will not have sufficient resources to run Superset effectively.
+   :::
+
+2. **Wait for Setup**: The initial setup takes several minutes. The Codespace will:
+   - Build the development container
+   - Install all dependencies
+   - Start all required services (PostgreSQL, Redis, etc.)
+   - Initialize the database with example data
+
+3. **Access Superset**: Once ready, check the **PORTS** tab in VS Code for port `9001`.
+   Click the globe icon to open Superset in your browser.
+   - Default credentials: `admin` / `admin`
+
+### Key Features
+
+- **Auto-reload**: Both Python and TypeScript files auto-refresh on save
+- **Pre-installed Extensions**: VS Code extensions for Python, TypeScript, and database tools
+- **Multiple Instances**: Run multiple Codespaces for different branches/features
+- **SSH Access**: Connect via terminal using `gh cs ssh` or through the GitHub web UI
+- **VS Code Integration**: Works seamlessly with VS Code desktop app
+
+### Managing Codespaces
+
+- **List active Codespaces**: `gh cs list`
+- **SSH into a Codespace**: `gh cs ssh`
+- **Stop a Codespace**: Via GitHub UI or `gh cs stop`
+- **Delete a Codespace**: Via GitHub UI or `gh cs delete`
+
+### Debugging and Logs
+
+Since Codespaces uses `docker-compose-light.yml`, you can monitor all services:
+
+```bash
+# Stream logs from all services
+docker compose -f docker-compose-light.yml logs -f
+
+# Stream logs from a specific service
+docker compose -f docker-compose-light.yml logs -f superset
+
+# View last 100 lines and follow
+docker compose -f docker-compose-light.yml logs --tail=100 -f
+
+# List all running services
+docker compose -f docker-compose-light.yml ps
+```
+
+:::tip
+Codespaces automatically stop after 30 minutes of inactivity to save resources.
+Your work is preserved and you can restart anytime.
+:::
+
 ## Installing Development Tools

 :::note
--- a/superset-frontend/src/explore/components/controls/VizTypeControl/VizTypeControl.test.tsx
+++ b/superset-frontend/src/explore/components/controls/VizTypeControl/VizTypeControl.test.tsx
@@ -41,6 +41,9 @@ import {
 import TableChartPlugin from '../../../../../plugins/plugin-chart-table/src';
 import VizTypeControl, { VIZ_TYPE_CONTROL_TEST_ID } from './index';

+// Mock scrollIntoView to avoid errors in test environment
+jest.mock('scroll-into-view-if-needed', () => jest.fn());
+
 jest.useFakeTimers();

 class MainPreset extends Preset {
@@ -256,4 +259,22 @@ describe('VizTypeControl', () => {

    expect(defaultProps.onChange).toHaveBeenCalledWith(VizType.Line);
  });
+
+  it('Search input is focused when modal opens', async () => {
+    // Mock the focus method to track if it was called
+    const focusSpy = jest.fn();
+    const originalFocus = HTMLInputElement.prototype.focus;
+    HTMLInputElement.prototype.focus = focusSpy;
+
+    await waitForRenderWrapper();
+
+    const searchInput = screen.getByTestId(getTestId('search-input'));
+
+    // Verify that focus() was called on the search input
+    expect(focusSpy).toHaveBeenCalled();
+    expect(searchInput).toBeInTheDocument();
+
+    // Restore the original focus method
+    HTMLInputElement.prototype.focus = originalFocus;
+  });
 });
--- a/superset-frontend/src/explore/components/controls/VizTypeControl/VizTypeGallery.tsx
+++ b/superset-frontend/src/explore/components/controls/VizTypeControl/VizTypeGallery.tsx
@@ -575,6 +575,13 @@ export default function VizTypeGallery(props: VizTypeGalleryProps) {
    setIsSearchFocused(true);
  }, []);

+  // Auto-focus the search input when the modal opens
+  useEffect(() => {
+    if (searchInputRef.current) {
+      searchInputRef.current.focus();
+    }
+  }, []);
+
  const changeSearch: ChangeEventHandler<HTMLInputElement> = useCallback(
    event => setSearchInputValue(event.target.value),
    [],
--- a/superset/commands/dataset/importers/v1/utils.py
+++ b/superset/commands/dataset/importers/v1/utils.py
@@ -199,6 +199,11 @@ def load_data(data_uri: str, dataset: SqlaTable, database: Database) -> None:
    :raises DatasetUnAllowedDataURI: If a dataset is trying
    to load data from a URI that is not allowed.
    """
+    from superset.examples.helpers import normalize_example_data_url
+
+    # Convert example URLs to align with configuration
+    data_uri = normalize_example_data_url(data_uri)
+
    validate_data_uri(data_uri)
    logger.info("Downloading data from %s", data_uri)
    data = request.urlopen(data_uri)  # pylint: disable=consider-using-with  # noqa: S310
--- a/superset/commands/importers/v1/utils.py
+++ b/superset/commands/importers/v1/utils.py
@@ -190,6 +190,12 @@ def load_configs(
                        db_ssh_tunnel_priv_key_passws[config["uuid"]]
                    )

+                # Normalize example data URLs before schema validation
+                if prefix == "datasets" and "data" in config:
+                    from superset.examples.helpers import normalize_example_data_url
+
+                    config["data"] = normalize_example_data_url(config["data"])
+
                schema.load(config)
                configs[file_name] = config
            except ValidationError as exc:
--- a/superset/connectors/sqla/models.py
+++ b/superset/connectors/sqla/models.py
@@ -1368,10 +1368,23 @@ class SqlaTable(
        return get_template_processor(table=self, database=self.database, **kwargs)

    def get_sqla_table(self) -> TableClause:
-        tbl = table(self.table_name)
+        # For databases that support cross-catalog queries (like BigQuery),
+        # include the catalog in the table identifier to generate
+        # project.dataset.table format
+        if self.catalog and self.database.db_engine_spec.supports_cross_catalog_queries:
+            # SQLAlchemy doesn't have built-in catalog support for TableClause,
+            # so we need to construct the full identifier manually
+            if self.schema:
+                full_name = f"{self.catalog}.{self.schema}.{self.table_name}"
+            else:
+                full_name = f"{self.catalog}.{self.table_name}"
+
+            return table(full_name)
+
        if self.schema:
-            tbl.schema = self.schema
-        return tbl
+            return table(self.table_name, schema=self.schema)
+
+        return table(self.table_name)

    def get_from_clause(
        self,
--- a/superset/examples/bart_lines.py
+++ b/superset/examples/bart_lines.py
@@ -38,7 +38,7 @@ def load_bart_lines(only_metadata: bool = False, force: bool = False) -> None:

        if not only_metadata and (not table_exists or force):
            df = read_example_data(
-                "bart-lines.json.gz", encoding="latin-1", compression="gzip"
+                "examples://bart-lines.json.gz", encoding="latin-1", compression="gzip"
            )
            df["path_json"] = df.path.map(json.dumps)
            df["polyline"] = df.path.map(polyline.encode)
--- a/superset/examples/birth_names.py
+++ b/superset/examples/birth_names.py
@@ -57,7 +57,7 @@ def gen_filter(


 def load_data(tbl_name: str, database: Database, sample: bool = False) -> None:
-    pdf = read_example_data("birth_names2.json.gz", compression="gzip")
+    pdf = read_example_data("examples://birth_names2.json.gz", compression="gzip")

    # TODO(bkyryliuk): move load examples data into the pytest fixture
    if database.backend == "presto":
@@ -584,8 +584,8 @@ def create_dashboard(slices: list[Slice]) -> Dashboard:
        }
    }"""
    )
-    # pylint: disable=echarts_timeseries_line-too-long
-    pos = json.loads(
+    # pylint: disable=line-too-long
+    pos = json.loads(  # noqa: TID251
        textwrap.dedent(
            """\
        {
@@ -859,11 +859,11 @@ def create_dashboard(slices: list[Slice]) -> Dashboard:
        """  # noqa: E501
        )
    )
-    # pylint: enable=echarts_timeseries_line-too-long
+    # pylint: enable=line-too-long
    # dashboard v2 doesn't allow add markup slice
    dash.slices = [slc for slc in slices if slc.viz_type != "markup"]
    update_slice_ids(pos)
    dash.dashboard_title = "USA Births Names"
-    dash.position_json = json.dumps(pos, indent=4)
+    dash.position_json = json.dumps(pos, indent=4)  # noqa: TID251
    dash.slug = "births"
    return dash
--- a/superset/examples/configs/datasets/examples/FCC_2018_Survey.yaml
+++ b/superset/examples/configs/datasets/examples/FCC_2018_Survey.yaml
@@ -1490,4 +1490,4 @@ columns:
    python_date_format: null
 version: 1.0.0
 database_uuid: a2dc77af-e654-49bb-b321-40f6b559a1ee
-data: https://github.com/apache-superset/examples-data/raw/master/datasets/examples/fcc_survey_2018.csv.gz
+data: examples://datasets/examples/fcc_survey_2018.csv.gz
--- a/superset/examples/configs/datasets/examples/channel_members.yaml
+++ b/superset/examples/configs/datasets/examples/channel_members.yaml
@@ -60,4 +60,4 @@ columns:
  python_date_format: null
 version: 1.0.0
 database_uuid: a2dc77af-e654-49bb-b321-40f6b559a1ee
-data: https://raw.githubusercontent.com/apache-superset/examples-data/master/datasets/examples/slack/channel_members.csv
+data: examples://datasets/examples/slack/channel_members.csv
--- a/superset/examples/configs/datasets/examples/channels.yaml
+++ b/superset/examples/configs/datasets/examples/channels.yaml
@@ -360,4 +360,4 @@ columns:
  python_date_format: null
 version: 1.0.0
 database_uuid: a2dc77af-e654-49bb-b321-40f6b559a1ee
-data: https://raw.githubusercontent.com/apache-superset/examples-data/master/datasets/examples/slack/channels.csv
+data: examples://datasets/examples/slack/channels.csv
--- a/superset/examples/configs/datasets/examples/cleaned_sales_data.yaml
+++ b/superset/examples/configs/datasets/examples/cleaned_sales_data.yaml
@@ -344,4 +344,4 @@ columns:
  extra: null
 version: 1.0.0
 database_uuid: a2dc77af-e654-49bb-b321-40f6b559a1ee
-data: https://raw.githubusercontent.com/apache-superset/examples-data/lowercase_columns_examples/datasets/examples/sales.csv
+data: examples://datasets/examples/sales.csv
--- a/superset/examples/configs/datasets/examples/covid_vaccines.yaml
+++ b/superset/examples/configs/datasets/examples/covid_vaccines.yaml
@@ -204,4 +204,4 @@ columns:
  python_date_format: null
 version: 1.0.0
 database_uuid: a2dc77af-e654-49bb-b321-40f6b559a1ee
-data: https://raw.githubusercontent.com/apache-superset/examples-data/lowercase_columns_examples/datasets/examples/covid_vaccines.csv
+data: examples://datasets/examples/covid_vaccines.csv
--- a/superset/examples/configs/datasets/examples/exported_stats.yaml
+++ b/superset/examples/configs/datasets/examples/exported_stats.yaml
@@ -260,4 +260,4 @@ columns:
  python_date_format: null
 version: 1.0.0
 database_uuid: a2dc77af-e654-49bb-b321-40f6b559a1ee
-data: https://raw.githubusercontent.com/apache-superset/examples-data/master/datasets/examples/slack/exported_stats.csv
+data: examples://datasets/examples/slack/exported_stats.csv
--- a/superset/examples/configs/datasets/examples/messages.yaml
+++ b/superset/examples/configs/datasets/examples/messages.yaml
@@ -480,4 +480,4 @@ columns:
  python_date_format: null
 version: 1.0.0
 database_uuid: a2dc77af-e654-49bb-b321-40f6b559a1ee
-data: https://raw.githubusercontent.com/apache-superset/examples-data/master/datasets/examples/slack/messages.csv
+data: examples://datasets/examples/slack/messages.csv
--- a/superset/examples/configs/datasets/examples/threads.yaml
+++ b/superset/examples/configs/datasets/examples/threads.yaml
@@ -180,4 +180,4 @@ columns:
  python_date_format: null
 version: 1.0.0
 database_uuid: a2dc77af-e654-49bb-b321-40f6b559a1ee
-data: https://raw.githubusercontent.com/apache-superset/examples-data/master/datasets/examples/slack/threads.csv
+data: examples://datasets/examples/slack/threads.csv
--- a/superset/examples/configs/datasets/examples/unicode_test.test.yaml
+++ b/superset/examples/configs/datasets/examples/unicode_test.test.yaml
@@ -90,4 +90,4 @@ columns:
  python_date_format: null
 version: 1.0.0
 database_uuid: a2dc77af-e654-49bb-b321-40f6b559a1ee
-data: https://raw.githubusercontent.com/apache-superset/examples-data/master/datasets/examples/unicode_test.csv
+data: examples://datasets/examples/unicode_test.csv
--- a/superset/examples/configs/datasets/examples/users.yaml
+++ b/superset/examples/configs/datasets/examples/users.yaml
@@ -220,4 +220,4 @@ columns:
    python_date_format: null
 version: 1.0.0
 database_uuid: a2dc77af-e654-49bb-b321-40f6b559a1ee
-data: https://raw.githubusercontent.com/apache-superset/examples-data/master/datasets/examples/slack/users.csv
+data: examples://datasets/examples/slack/users.csv
--- a/superset/examples/configs/datasets/examples/users_channels.yaml
+++ b/superset/examples/configs/datasets/examples/users_channels.yaml
@@ -60,4 +60,4 @@ columns:
    python_date_format: null
 version: 1.0.0
 database_uuid: a2dc77af-e654-49bb-b321-40f6b559a1ee
-data: https://raw.githubusercontent.com/apache-superset/examples-data/master/datasets/examples/slack/users_channels.csv
+data: examples://datasets/examples/slack/users_channels.csv
--- a/superset/examples/configs/datasets/examples/video_game_sales.yaml
+++ b/superset/examples/configs/datasets/examples/video_game_sales.yaml
@@ -153,4 +153,4 @@ columns:
    python_date_format: null
 version: 1.0.0
 database_uuid: a2dc77af-e654-49bb-b321-40f6b559a1ee
-data: https://github.com/apache-superset/examples-data/raw/lowercase_columns_examples/datasets/examples/video_game_sales.csv
+data: examples://datasets/examples/video_game_sales.csv
--- a/superset/examples/country_map.py
+++ b/superset/examples/country_map.py
@@ -49,7 +49,7 @@ def load_country_map_data(only_metadata: bool = False, force: bool = False) -> N

        if not only_metadata and (not table_exists or force):
            data = read_example_data(
-                "birth_france_data_for_country_map.csv", encoding="utf-8"
+                "examples://birth_france_data_for_country_map.csv", encoding="utf-8"
            )
            data["dttm"] = datetime.datetime.now().date()
            data.to_sql(
--- a/superset/examples/energy.py
+++ b/superset/examples/energy.py
@@ -50,7 +50,7 @@ def load_energy(
        table_exists = database.has_table(Table(tbl_name, schema))

        if not only_metadata and (not table_exists or force):
-            pdf = read_example_data("energy.json.gz", compression="gzip")
+            pdf = read_example_data("examples://energy.json.gz", compression="gzip")
            pdf = pdf.head(100) if sample else pdf
            pdf.to_sql(
                tbl_name,
--- a/superset/examples/flights.py
+++ b/superset/examples/flights.py
@@ -38,12 +38,12 @@ def load_flights(only_metadata: bool = False, force: bool = False) -> None:

        if not only_metadata and (not table_exists or force):
            pdf = read_example_data(
-                "flight_data.csv.gz", encoding="latin-1", compression="gzip"
+                "examples://flight_data.csv.gz", encoding="latin-1", compression="gzip"
            )

            # Loading airports info to join and get lat/long
            airports = read_example_data(
-                "airports.csv.gz", encoding="latin-1", compression="gzip"
+                "examples://airports.csv.gz", encoding="latin-1", compression="gzip"
            )
            airports = airports.set_index("IATA_CODE")

--- a/superset/examples/helpers.py
+++ b/superset/examples/helpers.py
@@ -54,6 +54,8 @@ from superset.connectors.sqla.models import SqlaTable
 from superset.models.slice import Slice
 from superset.utils import json

+EXAMPLES_PROTOCOL = "examples://"
+
 # ---------------------------------------------------------------------------
 # Public sample‑data mirror configuration
 # ---------------------------------------------------------------------------
@@ -125,6 +127,20 @@ def get_example_url(filepath: str) -> str:
    return f"{BASE_URL}{filepath}"


+def normalize_example_data_url(url: str) -> str:
+    """Convert example data URLs to use the configured CDN.
+
+    Transforms examples:// URLs to the configured CDN URL.
+    Non-example URLs are returned unchanged.
+    """
+    if url.startswith(EXAMPLES_PROTOCOL):
+        relative_path = url[len(EXAMPLES_PROTOCOL) :]
+        return get_example_url(relative_path)
+
+    # Not an examples URL, return unchanged
+    return url
+
+
 def read_example_data(
    filepath: str,
    max_attempts: int = 5,
@@ -132,9 +148,7 @@ def read_example_data(
    **kwargs: Any,
 ) -> pd.DataFrame:
    """Load CSV or JSON from example data mirror with retry/backoff."""
-    from superset.examples.helpers import get_example_url
-
-    url = get_example_url(filepath)
+    url = normalize_example_data_url(filepath)
    is_json = filepath.endswith(".json") or filepath.endswith(".json.gz")

    for attempt in range(1, max_attempts + 1):
--- a/superset/examples/long_lat.py
+++ b/superset/examples/long_lat.py
@@ -48,7 +48,7 @@ def load_long_lat_data(only_metadata: bool = False, force: bool = False) -> None

        if not only_metadata and (not table_exists or force):
            pdf = read_example_data(
-                "san_francisco.csv.gz", encoding="utf-8", compression="gzip"
+                "examples://san_francisco.csv.gz", encoding="utf-8", compression="gzip"
            )
            start = datetime.datetime.now().replace(
                hour=0, minute=0, second=0, microsecond=0
--- a/superset/examples/multiformat_time_series.py
+++ b/superset/examples/multiformat_time_series.py
@@ -49,7 +49,7 @@ def load_multiformat_time_series(  # pylint: disable=too-many-locals

        if not only_metadata and (not table_exists or force):
            pdf = read_example_data(
-                "multiformat_time_series.json.gz", compression="gzip"
+                "examples://multiformat_time_series.json.gz", compression="gzip"
            )

            # TODO(bkyryliuk): move load examples data into the pytest fixture
--- a/superset/examples/paris.py
+++ b/superset/examples/paris.py
@@ -37,7 +37,7 @@ def load_paris_iris_geojson(only_metadata: bool = False, force: bool = False) ->
        table_exists = database.has_table(Table(tbl_name, schema))

        if not only_metadata and (not table_exists or force):
-            df = read_example_data("paris_iris.json.gz", compression="gzip")
+            df = read_example_data("examples://paris_iris.json.gz", compression="gzip")
            df["features"] = df.features.map(json.dumps)

            df.to_sql(
--- a/superset/examples/random_time_series.py
+++ b/superset/examples/random_time_series.py
@@ -46,7 +46,9 @@ def load_random_time_series_data(
        table_exists = database.has_table(Table(tbl_name, schema))

        if not only_metadata and (not table_exists or force):
-            pdf = read_example_data("random_time_series.json.gz", compression="gzip")
+            pdf = read_example_data(
+                "examples://random_time_series.json.gz", compression="gzip"
+            )
            if database.backend == "presto":
                pdf.ds = pd.to_datetime(pdf.ds, unit="s")
                pdf.ds = pdf.ds.dt.strftime("%Y-%m-%d %H:%M%:%S")
--- a/superset/examples/sf_population_polygons.py
+++ b/superset/examples/sf_population_polygons.py
@@ -39,7 +39,9 @@ def load_sf_population_polygons(
        table_exists = database.has_table(Table(tbl_name, schema))

        if not only_metadata and (not table_exists or force):
-            df = read_example_data("sf_population.json.gz", compression="gzip")
+            df = read_example_data(
+                "examples://sf_population.json.gz", compression="gzip"
+            )
            df["contour"] = df.contour.map(json.dumps)

            df.to_sql(
--- a/superset/examples/world_bank.py
+++ b/superset/examples/world_bank.py
@@ -55,7 +55,7 @@ def load_world_bank_health_n_pop(  # pylint: disable=too-many-locals
        table_exists = database.has_table(Table(tbl_name, schema))

        if not only_metadata and (not table_exists or force):
-            pdf = read_example_data("countries.json.gz", compression="gzip")
+            pdf = read_example_data("examples://countries.json.gz", compression="gzip")
            pdf.columns = [col.replace(".", "_") for col in pdf.columns]
            if database.backend == "presto":
                pdf.year = pd.to_datetime(pdf.year)
--- a/superset/initialization/init.py
+++ b/superset/initialization/init.py
@@ -34,6 +34,7 @@ from flask_appbuilder.utils.base import get_safe_redirect
 from flask_babel import lazy_gettext as _, refresh
 from flask_compress import Compress
 from flask_session import Session
+from sqlalchemy import inspect
 from werkzeug.middleware.proxy_fix import ProxyFix

 from superset.constants import CHANGE_ME_SECRET_KEY
@@ -470,6 +471,31 @@ class SupersetAppInitializer:  # pylint: disable=too-many-public-methods
            icon="fa-lock",
        )

+    def _init_database_dependent_features(self) -> None:
+        """
+        Initialize features that require database tables to exist.
+        This is called during app initialization but checks table existence
+        to handle cases where the app starts before database migration.
+        """
+        inspector = inspect(db.engine)
+
+        # Check if core tables exist (use 'dashboards' as proxy for Superset tables)
+        if not inspector.has_table("dashboards"):
+            logger.debug(
+                "Superset tables not yet created. Skipping database-dependent "
+                "initialization. These features will be initialized after migration."
+            )
+            return
+
+        # Register SQLA event listeners for tagging system
+        if feature_flag_manager.is_feature_enabled("TAGGING_SYSTEM"):
+            register_sqla_event_listeners()
+
+        # Seed system themes from configuration
+        from superset.commands.theme.seed import SeedSystemThemesCommand
+
+        SeedSystemThemesCommand().run()
+
    def init_app_in_ctx(self) -> None:
        """
        Runs init logic in the context of the app
@@ -487,16 +513,8 @@ class SupersetAppInitializer:  # pylint: disable=too-many-public-methods
        if flask_app_mutator := self.config["FLASK_APP_MUTATOR"]:
            flask_app_mutator(self.superset_app)

-        if feature_flag_manager.is_feature_enabled("TAGGING_SYSTEM"):
-            register_sqla_event_listeners()
-
-        # Seed system themes from configuration
-        try:
-            from superset.commands.theme.seed import SeedSystemThemesCommand
-
-            SeedSystemThemesCommand().run()
-        except Exception:
-            logger.exception("Failed to seed system themes")
+        # Initialize database-dependent features only if database is ready
+        self._init_database_dependent_features()

        self.init_views()

--- a/tests/unit_tests/connectors/sqla/models_test.py
+++ b/tests/unit_tests/connectors/sqla/models_test.py
@@ -605,3 +605,94 @@ def test_fetch_metadata_empty_comment_field_handling(mocker: MockerFixture) -> N

    # Valid comment should be set
    assert columns_by_name["col_with_valid_comment"].description == "Valid comment"
+
+
+@pytest.mark.parametrize(
+    "supports_cross_catalog,table_name,catalog,schema,expected_name,expected_schema",
+    [
+        # Database supports cross-catalog queries (like BigQuery)
+        (
+            True,
+            "test_table",
+            "test_project",
+            "test_dataset",
+            "test_project.test_dataset.test_table",
+            None,
+        ),
+        # Database supports cross-catalog queries, catalog only (no schema)
+        (
+            True,
+            "test_table",
+            "test_project",
+            None,
+            "test_project.test_table",
+            None,
+        ),
+        # Database supports cross-catalog queries, schema only (no catalog)
+        (
+            True,
+            "test_table",
+            None,
+            "test_schema",
+            "test_table",
+            "test_schema",
+        ),
+        # Database supports cross-catalog queries, no catalog or schema
+        (
+            True,
+            "test_table",
+            None,
+            None,
+            "test_table",
+            None,
+        ),
+        # Database doesn't support cross-catalog queries, catalog ignored
+        (
+            False,
+            "test_table",
+            "test_catalog",
+            "test_schema",
+            "test_table",
+            "test_schema",
+        ),
+        # Database doesn't support cross-catalog queries, no schema
+        (
+            False,
+            "test_table",
+            "test_catalog",
+            None,
+            "test_table",
+            None,
+        ),
+    ],
+)
+def test_get_sqla_table_with_catalog(
+    mocker: MockerFixture,
+    supports_cross_catalog: bool,
+    table_name: str,
+    catalog: str | None,
+    schema: str | None,
+    expected_name: str,
+    expected_schema: str | None,
+) -> None:
+    """Test that get_sqla_table handles catalog inclusion correctly based on
+    database cross-catalog support
+    """
+    # Mock database with specified cross-catalog support
+    database = mocker.MagicMock()
+    database.db_engine_spec.supports_cross_catalog_queries = supports_cross_catalog
+
+    # Create table with specified parameters
+    table = SqlaTable(
+        table_name=table_name,
+        database=database,
+        schema=schema,
+        catalog=catalog,
+    )
+
+    # Get the SQLAlchemy table representation
+    sqla_table = table.get_sqla_table()
+
+    # Verify expected table name and schema
+    assert sqla_table.name == expected_name
+    assert sqla_table.schema == expected_schema
Author	SHA1	Message	Date
Maxime Beauchemin	375fe42a68	pointing link to master	2025-07-29 12:01:05 -07:00
Maxime Beauchemin	e6e0c3c47e	docs	2025-07-29 11:19:58 -07:00
Maxime Beauchemin	1d6617d809	improve startup script	2025-07-29 11:19:58 -07:00
Maxime Beauchemin	4ff2a85b11	gh	2025-07-29 11:19:58 -07:00
Maxime Beauchemin	f1a3bdd878	tweak utilities	2025-07-29 11:19:58 -07:00
Maxime Beauchemin	4b5dbf3dcf	public port	2025-07-29 11:19:58 -07:00
Maxime Beauchemin	458db68929	tmux	2025-07-29 11:19:58 -07:00
Maxime Beauchemin	d4463078ad	only 9001	2025-07-29 11:19:57 -07:00
Maxime Beauchemin	7ad10ac1a9	ssh	2025-07-29 11:19:57 -07:00
Maxime Beauchemin	f580f6159e	ok	2025-07-29 11:19:57 -07:00
Maxime Beauchemin	a26e0ea0fe	fix: Use Python 3.11 Bookworm image to match current standard - Switch to pre-built Python 3.11 image (no compilation) - Bookworm base matches Superset Docker images - Python 3.11 is the current tested standard - Faster startup, no building from source	2025-07-29 11:19:57 -07:00
Maxime Beauchemin	4eef7a65c1	fix: Remove Python feature to avoid building from source - Ubuntu 24.04 already includes Python 3.12 - No need to build Python from source (saves ~10min) - System Python is sufficient for host environment - Actual Superset Python runs in Docker containers	2025-07-29 11:19:57 -07:00
Maxime Beauchemin	ba3388bf94	feat: Add Claude Code CLI to devcontainer setup - Install Claude Code for AI-assisted development - Perfect for using 'claude --yes' safely in Codespaces - No risk to local machine when running automated commands	2025-07-29 11:19:57 -07:00
Maxime Beauchemin	ca57bbc1e2	feat: Add uv package installer to devcontainer setup - Install uv via official installer script - Provides 10-100x faster Python package operations - Matches what CI uses for package installation	2025-07-29 11:19:56 -07:00
Maxime Beauchemin	19f414b217	fix: Update Node version to 20 to match package.json requirements - package.json specifies Node ^20.18.1 - Update devcontainer to use Node 20 instead of 18	2025-07-29 11:19:56 -07:00
Maxime Beauchemin	bc604d54e4	fix: Use Ubuntu 24.04 base to match CI with Python 3.11 - Switch to ubuntu-24.04 to match CI environment - Add Python 3.11 explicitly - Keep lean setup with only needed features	2025-07-29 11:19:56 -07:00
Maxime Beauchemin	e922e51e6b	fix: Use lean Python base image instead of bloated universal - Switch from 10GB universal to ~2GB Python base - Add only needed features: Docker, Node, Git - Much faster Codespace startup - Same functionality, less bloat	2025-07-29 11:19:56 -07:00
Maxime Beauchemin	8bf2e4ea3a	fix: Simplify devcontainer to avoid docker-compose conflicts - Remove all features (universal image has everything) - Simplified config to just image + scripts - No dockerComposeFile reference - Plain container that runs docker-compose internally	2025-07-29 11:19:56 -07:00
Maxime Beauchemin	cf8183b67e	fix: Force rebuild with clean devcontainer config	2025-07-29 11:19:56 -07:00
Maxime Beauchemin	02f90f4321	feat: Use devcontainers/universal image for better tooling - Switch to universal:2 image which includes vim, curl, jq, tmux, etc. - Remove redundant features (already in universal image) - Simplify setup script - only install Superset-specific libs - Keeps SSH feature for remote access	2025-07-29 11:19:55 -07:00
Maxime Beauchemin	a007b3020d	fix: Refactor devcontainer to use base Ubuntu with Docker-in-Docker - Switch from docker-compose service to base Ubuntu container - Add Docker-in-Docker to run docker-compose inside Codespace - This provides git access and full dev environment - Superset services run via docker-compose from within the container	2025-07-29 11:19:55 -07:00
Maxime Beauchemin	26e5e637f9	feat: Add SSH support to Codespaces configuration	2025-07-29 11:19:55 -07:00
Maxime Beauchemin	8de420ec8e	fix: Correct workspace paths for Codespaces - Use /workspaces instead of /app for Codespaces compatibility - Fix postCreateCommand and postStartCommand paths - Make startup script more flexible with directory detection	2025-07-29 11:19:55 -07:00
Maxime Beauchemin	fd51cc65a2	feat: Add GitHub Codespaces support with docker-compose-light ## Summary Adds full GitHub Codespaces development environment configuration leveraging the new `docker-compose-light.yml` for efficient cloud development. ## Key Features - Lightweight Setup: Uses `docker-compose-light.yml` which removes Redis/nginx for faster startup and lower resource usage - Multi-Instance Support: Each Codespace gets isolated database volumes, perfect for testing multiple branches - Auto-Configuration: Includes VS Code extensions, Python/TypeScript settings, and auto-start script - Developer Friendly: Comprehensive README with SSH, VS Code, and browser connection instructions ## Implementation Details ### Files Added - `.devcontainer/devcontainer.json` - Main configuration with: - Docker-in-Docker support for compose - Optimized VS Code extensions for Superset development - Smart port forwarding (9001 for frontend, 8088 for API) - 4-core/8GB recommended resources - `.devcontainer/start-superset.sh` - Auto-start script that: - Uses unique project names per Codespace - Handles Docker daemon startup - Shows clear status and credentials - `.devcontainer/README.md` - Developer guide covering: - Multiple connection methods (SSH, VS Code, browser) - Port forwarding instructions - Cost optimization tips - Integration with `claude --yes` workflows ## Benefits 1. Isolated Development: No risk to local machine when using `claude --yes` 2. Resource Efficiency: Laptop stays cool, Codespaces handles the load 3. Parallel Testing: Spin up multiple instances for different features 4. Quick Pause/Resume: Auto-stops when idle, resumes in ~30 seconds ## Testing Push to fork and create a Codespace to test. The environment auto-starts Superset and forwards port 9001 with HTTPS. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>	2025-07-29 11:19:55 -07:00
Maxime Beauchemin	16db999067	fix: rate limiting issues with example data hosted on github.com (#34381 )	2025-07-29 11:19:29 -07:00
Beto Dealmeida	972be15dda	feat: focus on text input when modal opens (#34379 )	2025-07-29 14:01:10 -04:00
Maxime Beauchemin	c9e06714f8	fix: prevent theme initialization errors during fresh installs (#34339 ) Co-authored-by: Claude <noreply@anthropic.com>	2025-07-29 09:32:53 -07:00
Beto Dealmeida	32626ab707	fix: use catalog name on generated queries (#34360 )	2025-07-29 12:30:46 -04:00