mirror of
https://github.com/apache/superset.git
synced 2026-04-13 13:18:25 +00:00
fix: loading examples from raw.githubusercontent.com fails with 429 errors (#33354)
This commit is contained in:
committed by
GitHub
parent
7791674f24
commit
f045a73e2d
@@ -14,6 +14,34 @@
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
"""Helpers for loading Superset example datasets.
|
||||
|
||||
All Superset example data files (CSV, JSON, etc.) are fetched via the
|
||||
jsDelivr CDN instead of raw.githubusercontent.com to avoid GitHub API
|
||||
rate limits (60 anonymous requests/hour/IP).
|
||||
|
||||
jsDelivr is a multi‑CDN front for public GitHub repos and supports
|
||||
arbitrary paths including nested folders. It doesn’t use the GitHub REST API
|
||||
and advertises unlimited bandwidth for open-source use.
|
||||
|
||||
Example URL::
|
||||
|
||||
https://cdn.jsdelivr.net/gh/apache-superset/examples-data@master/datasets/examples/slack/messages.csv
|
||||
|
||||
Environment knobs
|
||||
-----------------
|
||||
``SUPERSET_EXAMPLES_DATA_REF`` (default: ``master``)
|
||||
Tag / branch / SHA to pin so builds remain reproducible.
|
||||
|
||||
``SUPERSET_EXAMPLES_BASE_URL``
|
||||
Override the base completely if you want to host the files elsewhere
|
||||
(internal mirror, S3 bucket, ASF downloads, …). **Include any query
|
||||
string required by your hosting (e.g. ``?raw=true`` if you point back
|
||||
to a GitHub *blob* URL).**
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import Any
|
||||
|
||||
@@ -22,27 +50,41 @@ from superset.connectors.sqla.models import SqlaTable
|
||||
from superset.models.slice import Slice
|
||||
from superset.utils import json
|
||||
|
||||
BASE_URL = "https://github.com/apache-superset/examples-data/blob/master/"
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public sample‑data mirror configuration
|
||||
# ---------------------------------------------------------------------------
|
||||
BASE_COMMIT: str = os.getenv("SUPERSET_EXAMPLES_DATA_REF", "master")
|
||||
BASE_URL: str = os.getenv(
|
||||
"SUPERSET_EXAMPLES_BASE_URL",
|
||||
f"https://cdn.jsdelivr.net/gh/apache-superset/examples-data@{BASE_COMMIT}/",
|
||||
)
|
||||
|
||||
misc_dash_slices: set[str] = set() # slices assembled in a 'Misc Chart' dashboard
|
||||
# Slices assembled into a 'Misc Chart' dashboard
|
||||
misc_dash_slices: set[str] = set()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Utility functions
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def get_table_connector_registry() -> Any:
|
||||
"""Return the SqlaTable registry so we can mock it in unit tests."""
|
||||
return SqlaTable
|
||||
|
||||
|
||||
def get_examples_folder() -> str:
|
||||
"""Return local path to the examples folder (when vendored)."""
|
||||
return os.path.join(app.config["BASE_DIR"], "examples")
|
||||
|
||||
|
||||
def update_slice_ids(pos: dict[Any, Any]) -> list[Slice]:
|
||||
"""Update slice ids in position_json and return the slices found."""
|
||||
"""Update slice ids in ``position_json`` and return the slices found."""
|
||||
slice_components = [
|
||||
component
|
||||
for component in pos.values()
|
||||
if isinstance(component, dict) and component.get("type") == "CHART"
|
||||
]
|
||||
slices = {}
|
||||
slices: dict[str, Slice] = {}
|
||||
for name in {component["meta"]["sliceName"] for component in slice_components}:
|
||||
slc = db.session.query(Slice).filter_by(slice_name=name).first()
|
||||
if slc:
|
||||
@@ -56,17 +98,24 @@ def update_slice_ids(pos: dict[Any, Any]) -> list[Slice]:
|
||||
|
||||
|
||||
def merge_slice(slc: Slice) -> None:
|
||||
o = db.session.query(Slice).filter_by(slice_name=slc.slice_name).first()
|
||||
if o:
|
||||
db.session.delete(o)
|
||||
"""Upsert a Slice by name."""
|
||||
existing = db.session.query(Slice).filter_by(slice_name=slc.slice_name).first()
|
||||
if existing:
|
||||
db.session.delete(existing)
|
||||
db.session.add(slc)
|
||||
|
||||
|
||||
def get_slice_json(defaults: dict[Any, Any], **kwargs: Any) -> str:
|
||||
"""Return JSON string for a chart definition, merging extra kwargs."""
|
||||
defaults_copy = defaults.copy()
|
||||
defaults_copy.update(kwargs)
|
||||
return json.dumps(defaults_copy, indent=4, sort_keys=True)
|
||||
|
||||
|
||||
def get_example_url(filepath: str) -> str:
|
||||
return f"{BASE_URL}{filepath}?raw=true"
|
||||
"""Return an absolute URL to *filepath* under the examples‑data repo.
|
||||
|
||||
All calls are routed through jsDelivr unless overridden. Supports nested
|
||||
paths like ``datasets/examples/slack/messages.csv``.
|
||||
"""
|
||||
return f"{BASE_URL}{filepath}"
|
||||
|
||||
@@ -21,7 +21,7 @@ from tests.unit_tests.migrations.viz.utils import migrate_and_assert
|
||||
|
||||
SOURCE_FORM_DATA: dict[str, Any] = {
|
||||
"granularity_sqla": "ds",
|
||||
"time_range": "100 years ago : today",
|
||||
"time_range": "1925-04-24 : 2025-04-24",
|
||||
"viz_type": "pivot_table",
|
||||
}
|
||||
|
||||
@@ -29,7 +29,7 @@ TARGET_FORM_DATA: dict[str, Any] = {
|
||||
"form_data_bak": SOURCE_FORM_DATA,
|
||||
"granularity_sqla": "ds",
|
||||
"rowOrder": "value_z_to_a",
|
||||
"time_range": "100 years ago : today",
|
||||
"time_range": "1925-04-24 : 2025-04-24",
|
||||
"viz_type": "pivot_table_v2",
|
||||
}
|
||||
|
||||
@@ -40,7 +40,7 @@ def test_migration() -> None:
|
||||
target["adhoc_filters"] = [
|
||||
{
|
||||
"clause": "WHERE",
|
||||
"comparator": "100 years ago : today",
|
||||
"comparator": "1925-04-24 : 2025-04-24",
|
||||
"expressionType": "SIMPLE",
|
||||
"operator": "TEMPORAL_RANGE",
|
||||
"subject": "ds",
|
||||
|
||||
Reference in New Issue
Block a user