mirror of
https://github.com/apache/superset.git
synced 2026-04-19 08:04:53 +00:00
fix: loading examples from raw.githubusercontent.com fails with 429 errors (#33354)
This commit is contained in:
committed by
GitHub
parent
7791674f24
commit
f045a73e2d
@@ -14,6 +14,34 @@
|
|||||||
# KIND, either express or implied. See the License for the
|
# KIND, either express or implied. See the License for the
|
||||||
# specific language governing permissions and limitations
|
# specific language governing permissions and limitations
|
||||||
# under the License.
|
# under the License.
|
||||||
|
"""Helpers for loading Superset example datasets.
|
||||||
|
|
||||||
|
All Superset example data files (CSV, JSON, etc.) are fetched via the
|
||||||
|
jsDelivr CDN instead of raw.githubusercontent.com to avoid GitHub API
|
||||||
|
rate limits (60 anonymous requests/hour/IP).
|
||||||
|
|
||||||
|
jsDelivr is a multi‑CDN front for public GitHub repos and supports
|
||||||
|
arbitrary paths including nested folders. It doesn’t use the GitHub REST API
|
||||||
|
and advertises unlimited bandwidth for open-source use.
|
||||||
|
|
||||||
|
Example URL::
|
||||||
|
|
||||||
|
https://cdn.jsdelivr.net/gh/apache-superset/examples-data@master/datasets/examples/slack/messages.csv
|
||||||
|
|
||||||
|
Environment knobs
|
||||||
|
-----------------
|
||||||
|
``SUPERSET_EXAMPLES_DATA_REF`` (default: ``master``)
|
||||||
|
Tag / branch / SHA to pin so builds remain reproducible.
|
||||||
|
|
||||||
|
``SUPERSET_EXAMPLES_BASE_URL``
|
||||||
|
Override the base completely if you want to host the files elsewhere
|
||||||
|
(internal mirror, S3 bucket, ASF downloads, …). **Include any query
|
||||||
|
string required by your hosting (e.g. ``?raw=true`` if you point back
|
||||||
|
to a GitHub *blob* URL).**
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
import os
|
import os
|
||||||
from typing import Any
|
from typing import Any
|
||||||
|
|
||||||
@@ -22,27 +50,41 @@ from superset.connectors.sqla.models import SqlaTable
|
|||||||
from superset.models.slice import Slice
|
from superset.models.slice import Slice
|
||||||
from superset.utils import json
|
from superset.utils import json
|
||||||
|
|
||||||
BASE_URL = "https://github.com/apache-superset/examples-data/blob/master/"
|
# ---------------------------------------------------------------------------
|
||||||
|
# Public sample‑data mirror configuration
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
BASE_COMMIT: str = os.getenv("SUPERSET_EXAMPLES_DATA_REF", "master")
|
||||||
|
BASE_URL: str = os.getenv(
|
||||||
|
"SUPERSET_EXAMPLES_BASE_URL",
|
||||||
|
f"https://cdn.jsdelivr.net/gh/apache-superset/examples-data@{BASE_COMMIT}/",
|
||||||
|
)
|
||||||
|
|
||||||
misc_dash_slices: set[str] = set() # slices assembled in a 'Misc Chart' dashboard
|
# Slices assembled into a 'Misc Chart' dashboard
|
||||||
|
misc_dash_slices: set[str] = set()
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Utility functions
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
def get_table_connector_registry() -> Any:
|
def get_table_connector_registry() -> Any:
|
||||||
|
"""Return the SqlaTable registry so we can mock it in unit tests."""
|
||||||
return SqlaTable
|
return SqlaTable
|
||||||
|
|
||||||
|
|
||||||
def get_examples_folder() -> str:
|
def get_examples_folder() -> str:
|
||||||
|
"""Return local path to the examples folder (when vendored)."""
|
||||||
return os.path.join(app.config["BASE_DIR"], "examples")
|
return os.path.join(app.config["BASE_DIR"], "examples")
|
||||||
|
|
||||||
|
|
||||||
def update_slice_ids(pos: dict[Any, Any]) -> list[Slice]:
|
def update_slice_ids(pos: dict[Any, Any]) -> list[Slice]:
|
||||||
"""Update slice ids in position_json and return the slices found."""
|
"""Update slice ids in ``position_json`` and return the slices found."""
|
||||||
slice_components = [
|
slice_components = [
|
||||||
component
|
component
|
||||||
for component in pos.values()
|
for component in pos.values()
|
||||||
if isinstance(component, dict) and component.get("type") == "CHART"
|
if isinstance(component, dict) and component.get("type") == "CHART"
|
||||||
]
|
]
|
||||||
slices = {}
|
slices: dict[str, Slice] = {}
|
||||||
for name in {component["meta"]["sliceName"] for component in slice_components}:
|
for name in {component["meta"]["sliceName"] for component in slice_components}:
|
||||||
slc = db.session.query(Slice).filter_by(slice_name=name).first()
|
slc = db.session.query(Slice).filter_by(slice_name=name).first()
|
||||||
if slc:
|
if slc:
|
||||||
@@ -56,17 +98,24 @@ def update_slice_ids(pos: dict[Any, Any]) -> list[Slice]:
|
|||||||
|
|
||||||
|
|
||||||
def merge_slice(slc: Slice) -> None:
|
def merge_slice(slc: Slice) -> None:
|
||||||
o = db.session.query(Slice).filter_by(slice_name=slc.slice_name).first()
|
"""Upsert a Slice by name."""
|
||||||
if o:
|
existing = db.session.query(Slice).filter_by(slice_name=slc.slice_name).first()
|
||||||
db.session.delete(o)
|
if existing:
|
||||||
|
db.session.delete(existing)
|
||||||
db.session.add(slc)
|
db.session.add(slc)
|
||||||
|
|
||||||
|
|
||||||
def get_slice_json(defaults: dict[Any, Any], **kwargs: Any) -> str:
|
def get_slice_json(defaults: dict[Any, Any], **kwargs: Any) -> str:
|
||||||
|
"""Return JSON string for a chart definition, merging extra kwargs."""
|
||||||
defaults_copy = defaults.copy()
|
defaults_copy = defaults.copy()
|
||||||
defaults_copy.update(kwargs)
|
defaults_copy.update(kwargs)
|
||||||
return json.dumps(defaults_copy, indent=4, sort_keys=True)
|
return json.dumps(defaults_copy, indent=4, sort_keys=True)
|
||||||
|
|
||||||
|
|
||||||
def get_example_url(filepath: str) -> str:
|
def get_example_url(filepath: str) -> str:
|
||||||
return f"{BASE_URL}{filepath}?raw=true"
|
"""Return an absolute URL to *filepath* under the examples‑data repo.
|
||||||
|
|
||||||
|
All calls are routed through jsDelivr unless overridden. Supports nested
|
||||||
|
paths like ``datasets/examples/slack/messages.csv``.
|
||||||
|
"""
|
||||||
|
return f"{BASE_URL}{filepath}"
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ from tests.unit_tests.migrations.viz.utils import migrate_and_assert
|
|||||||
|
|
||||||
SOURCE_FORM_DATA: dict[str, Any] = {
|
SOURCE_FORM_DATA: dict[str, Any] = {
|
||||||
"granularity_sqla": "ds",
|
"granularity_sqla": "ds",
|
||||||
"time_range": "100 years ago : today",
|
"time_range": "1925-04-24 : 2025-04-24",
|
||||||
"viz_type": "pivot_table",
|
"viz_type": "pivot_table",
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -29,7 +29,7 @@ TARGET_FORM_DATA: dict[str, Any] = {
|
|||||||
"form_data_bak": SOURCE_FORM_DATA,
|
"form_data_bak": SOURCE_FORM_DATA,
|
||||||
"granularity_sqla": "ds",
|
"granularity_sqla": "ds",
|
||||||
"rowOrder": "value_z_to_a",
|
"rowOrder": "value_z_to_a",
|
||||||
"time_range": "100 years ago : today",
|
"time_range": "1925-04-24 : 2025-04-24",
|
||||||
"viz_type": "pivot_table_v2",
|
"viz_type": "pivot_table_v2",
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -40,7 +40,7 @@ def test_migration() -> None:
|
|||||||
target["adhoc_filters"] = [
|
target["adhoc_filters"] = [
|
||||||
{
|
{
|
||||||
"clause": "WHERE",
|
"clause": "WHERE",
|
||||||
"comparator": "100 years ago : today",
|
"comparator": "1925-04-24 : 2025-04-24",
|
||||||
"expressionType": "SIMPLE",
|
"expressionType": "SIMPLE",
|
||||||
"operator": "TEMPORAL_RANGE",
|
"operator": "TEMPORAL_RANGE",
|
||||||
"subject": "ds",
|
"subject": "ds",
|
||||||
|
|||||||
Reference in New Issue
Block a user