diff --git a/superset-frontend/plugins/plugin-chart-country-map/scripts/.gitignore b/superset-frontend/plugins/plugin-chart-country-map/scripts/.gitignore new file mode 100644 index 00000000000..2385d42b15f --- /dev/null +++ b/superset-frontend/plugins/plugin-chart-country-map/scripts/.gitignore @@ -0,0 +1,6 @@ +# Build cache (downloaded NE shapefiles) +.cache/ + +# Build outputs (regenerated; not committed at the script-dir level — +# the eventual location for shipped GeoJSON is the plugin's src/data/) +output/ diff --git a/superset-frontend/plugins/plugin-chart-country-map/scripts/build.py b/superset-frontend/plugins/plugin-chart-country-map/scripts/build.py new file mode 100755 index 00000000000..3b1fa885e56 --- /dev/null +++ b/superset-frontend/plugins/plugin-chart-country-map/scripts/build.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +""" +Country Map build pipeline — Natural Earth → GeoJSON. + +Replaces the legacy Jupyter notebook. Reads YAML configs from config/, +downloads pinned Natural Earth shapefiles, applies declarative transforms, +optionally runs procedural escape-hatch scripts, and writes per-worldview +GeoJSON outputs to output/. + +Run with: ./build.sh (which is just `python3 build.py` with sensible env) + +This is the POC version — currently implements: + - NE shapefile download + cache (pinned to v5.1.2) + - Shapefile → GeoJSON conversion via mapshaper CLI + - name_overrides.yaml application + - One worldview (UA) at Admin 0 + +Future commits will add: multiple worldviews, Admin 1, flying_islands, +territory_assignments, regional_aggregations, composite_maps, simplification, +procedural/ orchestration. +""" + +from __future__ import annotations + +import json +import shutil +import subprocess +import sys +import urllib.request +from pathlib import Path +from typing import Any + +import yaml # type: ignore[import-untyped] + +# ---------------------------------------------------------------------- +# Constants / paths +# ---------------------------------------------------------------------- + +NE_REPO = "nvkelso/natural-earth-vector" +NE_PINNED_TAG = "v5.1.2" +NE_PINNED_SHA = "f1890d9f152c896d250a77557a5751a93d494776" +NE_RAW_URL = f"https://raw.githubusercontent.com/{NE_REPO}/{NE_PINNED_SHA}/10m_cultural" + +SCRIPT_DIR = Path(__file__).resolve().parent +CONFIG_DIR = SCRIPT_DIR / "config" +OUTPUT_DIR = SCRIPT_DIR / "output" +CACHE_DIR = SCRIPT_DIR / ".cache" + +SHAPEFILE_EXTS = ["shp", "shx", "dbf", "prj", "cpg"] + +# Worldview codes shipped by NE as suffixes on the Admin 0 file name. Empty +# string = the "Default" (ungrouped) NE editorial. The new plugin's +# documented default is "ukr". +WORLDVIEWS_ADMIN_0 = [ + "", # Default + "ukr", # Ukraine — Superset's documented default +] + + +def log(msg: str) -> None: + print(msg, file=sys.stderr, flush=True) + + +# ---------------------------------------------------------------------- +# NE download +# ---------------------------------------------------------------------- + + +def fetch_ne_shapefile(admin_level: int, worldview: str = "") -> Path: + """Download (or use cached) shapefile components for one NE layer. + + Returns the path to the `.shp` file; sibling `.shx`/`.dbf`/`.prj`/`.cpg` + files live alongside as mapshaper requires. + """ + if admin_level == 0: + suffix = f"_{worldview}" if worldview else "" + basename = f"ne_10m_admin_0_countries{suffix}" + elif admin_level == 1: + # NE only publishes worldview-specific files at Admin 0. Admin 1 + # uses a single file with per-feature `WORLDVIEW` attributes. + basename = "ne_10m_admin_1_states_provinces" + else: + raise ValueError(f"Unsupported admin_level={admin_level}") + + target_shp = CACHE_DIR / f"{basename}.shp" + if target_shp.exists(): + return target_shp + + CACHE_DIR.mkdir(exist_ok=True) + log(f"Downloading NE {basename} (worldview={worldview or 'default'})…") + for ext in SHAPEFILE_EXTS: + url = f"{NE_RAW_URL}/{basename}.{ext}" + dest = CACHE_DIR / f"{basename}.{ext}" + try: + urllib.request.urlretrieve(url, dest) + except urllib.error.HTTPError as e: + if ext == "cpg" and e.code == 404: + # .cpg is optional in shapefile bundles + continue + raise + + return target_shp + + +# ---------------------------------------------------------------------- +# Shapefile → GeoJSON via mapshaper CLI +# ---------------------------------------------------------------------- + + +def shp_to_geojson(shp: Path, output: Path) -> None: + """Convert a shapefile to GeoJSON FeatureCollection.""" + if shutil.which("npx") is None: + raise RuntimeError( + "npx not found in PATH; mapshaper is required for shapefile conversion" + ) + log(f" mapshaper: {shp.name} → {output.name}") + subprocess.run( + ["npx", "--yes", "mapshaper", str(shp), "-o", str(output), "format=geojson"], + check=True, + stderr=subprocess.DEVNULL, + ) + + +# ---------------------------------------------------------------------- +# Match helpers +# ---------------------------------------------------------------------- + + +def _matches(props: dict[str, Any], conditions: dict[str, Any]) -> bool: + """Check whether a feature's properties satisfy all conditions in match. + + Supports two value forms: + - scalar: exact equality + - {in: [...]}: membership in a list + """ + for k, want in conditions.items(): + got = props.get(k) + if isinstance(want, dict) and "in" in want: + if got not in want["in"]: + return False + else: + if got != want: + return False + return True + + +# ---------------------------------------------------------------------- +# Transforms +# ---------------------------------------------------------------------- + + +def apply_name_overrides(geo: dict, overrides: list[dict]) -> dict: + """Apply attribute overrides from name_overrides.yaml.""" + n_applied = 0 + for entry in overrides: + match = entry["match"] + new_values = entry["set"] + for feature in geo["features"]: + props = feature["properties"] + if _matches(props, match): + props.update(new_values) + n_applied += 1 + log(f" name_overrides: applied {n_applied} field updates " + f"across {len(overrides)} entries") + return geo + + +# ---------------------------------------------------------------------- +# Main +# ---------------------------------------------------------------------- + + +def main() -> int: + OUTPUT_DIR.mkdir(exist_ok=True) + + log(f"Country Map build — pinned to NE {NE_PINNED_TAG} ({NE_PINNED_SHA[:8]})") + + # Load configs + name_overrides = yaml.safe_load( + (CONFIG_DIR / "name_overrides.yaml").read_text() + )["overrides"] + log(f"Loaded {len(name_overrides)} name override entries") + + # POC scope: UA worldview, Admin 0 only. Future commits expand this. + worldview = "ukr" + admin_level = 0 + + log(f"\nBuilding worldview={worldview} admin_level={admin_level}") + shp = fetch_ne_shapefile(admin_level, worldview) + raw_geojson = OUTPUT_DIR / f"_raw_{worldview}_admin{admin_level}.geo.json" + shp_to_geojson(shp, raw_geojson) + + geo = json.loads(raw_geojson.read_text()) + log(f" loaded {len(geo['features'])} features") + + geo = apply_name_overrides(geo, name_overrides) + # TODO(next-commit): flying_islands, territory_assignments, + # composite_maps, regional_aggregations, simplification, procedural/ + + final = OUTPUT_DIR / f"{worldview}_admin{admin_level}.geo.json" + final.write_text(json.dumps(geo)) + log(f" wrote {final} ({final.stat().st_size:,} bytes)") + + # Cleanup intermediate + raw_geojson.unlink() + + log("\nDone.") + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/superset-frontend/plugins/plugin-chart-country-map/scripts/build.sh b/superset-frontend/plugins/plugin-chart-country-map/scripts/build.sh new file mode 100755 index 00000000000..1f881bdb142 --- /dev/null +++ b/superset-frontend/plugins/plugin-chart-country-map/scripts/build.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash +# Country Map build pipeline. +# +# One-shot, reproducible: pinned upstream NE version, deterministic outputs. +# Replaces the legacy Jupyter notebook. See README.md for details. + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +cd "$SCRIPT_DIR" + +# Sanity checks +command -v python3 >/dev/null || { echo "python3 required" >&2; exit 1; } +command -v npx >/dev/null || { echo "npx (Node.js) required for mapshaper" >&2; exit 1; } + +python3 -c "import yaml" 2>/dev/null || { + echo "PyYAML required: pip install pyyaml" >&2 + exit 1 +} + +exec python3 build.py "$@"