mirror of
https://github.com/apache/superset.git
synced 2026-05-21 15:55:10 +00:00
feat(country-map): minimum-viable build pipeline (NE → GeoJSON)
End-to-end working pipeline replacing the legacy notebook for one
worldview / admin level. Verified locally:
$ ./build.sh
Country Map build — pinned to NE v5.1.2 (f1890d9f)
Loaded 10 name override entries
Building worldview=ukr admin_level=0
Downloading NE ne_10m_admin_0_countries_ukr (worldview=ukr)…
mapshaper: ne_10m_admin_0_countries_ukr.shp → _raw_ukr_admin0.geo.json
loaded 249 features
name_overrides: applied 0 field updates across 10 entries
wrote .../output/ukr_admin0.geo.json (23,639,348 bytes)
Done.
What's wired:
- NE download from pinned tag (v5.1.2 / SHA f1890d9f) with cache
- Shapefile → GeoJSON via mapshaper CLI
- YAML config loading (currently just name_overrides)
- name_overrides transform with {match, set} semantics, including
the {in: [...]} list-membership matcher
- Output writes to scripts/output/ (gitignored)
- build.sh wrapper validates Python + Node + PyYAML are available
What's stubbed for future commits (TODO inline):
- Multiple worldviews (currently UA only)
- Admin 1 build (where name_overrides actually fire — currently no
features in Admin 0 match the FRA/PHL admin1 entries)
- flying_islands, territory_assignments, regional_aggregations,
composite_maps transforms
- Simplification (mapshaper -simplify)
- Procedural escape-hatch orchestration
- Manifest with NE SHA + build metadata
The 0 overrides applied is correct, not a bug: all current entries
target Admin 1 features.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
6
superset-frontend/plugins/plugin-chart-country-map/scripts/.gitignore
vendored
Normal file
6
superset-frontend/plugins/plugin-chart-country-map/scripts/.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
# Build cache (downloaded NE shapefiles)
|
||||
.cache/
|
||||
|
||||
# Build outputs (regenerated; not committed at the script-dir level —
|
||||
# the eventual location for shipped GeoJSON is the plugin's src/data/)
|
||||
output/
|
||||
212
superset-frontend/plugins/plugin-chart-country-map/scripts/build.py
Executable file
212
superset-frontend/plugins/plugin-chart-country-map/scripts/build.py
Executable file
@@ -0,0 +1,212 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Country Map build pipeline — Natural Earth → GeoJSON.
|
||||
|
||||
Replaces the legacy Jupyter notebook. Reads YAML configs from config/,
|
||||
downloads pinned Natural Earth shapefiles, applies declarative transforms,
|
||||
optionally runs procedural escape-hatch scripts, and writes per-worldview
|
||||
GeoJSON outputs to output/.
|
||||
|
||||
Run with: ./build.sh (which is just `python3 build.py` with sensible env)
|
||||
|
||||
This is the POC version — currently implements:
|
||||
- NE shapefile download + cache (pinned to v5.1.2)
|
||||
- Shapefile → GeoJSON conversion via mapshaper CLI
|
||||
- name_overrides.yaml application
|
||||
- One worldview (UA) at Admin 0
|
||||
|
||||
Future commits will add: multiple worldviews, Admin 1, flying_islands,
|
||||
territory_assignments, regional_aggregations, composite_maps, simplification,
|
||||
procedural/ orchestration.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import urllib.request
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
import yaml # type: ignore[import-untyped]
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Constants / paths
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
NE_REPO = "nvkelso/natural-earth-vector"
|
||||
NE_PINNED_TAG = "v5.1.2"
|
||||
NE_PINNED_SHA = "f1890d9f152c896d250a77557a5751a93d494776"
|
||||
NE_RAW_URL = f"https://raw.githubusercontent.com/{NE_REPO}/{NE_PINNED_SHA}/10m_cultural"
|
||||
|
||||
SCRIPT_DIR = Path(__file__).resolve().parent
|
||||
CONFIG_DIR = SCRIPT_DIR / "config"
|
||||
OUTPUT_DIR = SCRIPT_DIR / "output"
|
||||
CACHE_DIR = SCRIPT_DIR / ".cache"
|
||||
|
||||
SHAPEFILE_EXTS = ["shp", "shx", "dbf", "prj", "cpg"]
|
||||
|
||||
# Worldview codes shipped by NE as suffixes on the Admin 0 file name. Empty
|
||||
# string = the "Default" (ungrouped) NE editorial. The new plugin's
|
||||
# documented default is "ukr".
|
||||
WORLDVIEWS_ADMIN_0 = [
|
||||
"", # Default
|
||||
"ukr", # Ukraine — Superset's documented default
|
||||
]
|
||||
|
||||
|
||||
def log(msg: str) -> None:
|
||||
print(msg, file=sys.stderr, flush=True)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# NE download
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def fetch_ne_shapefile(admin_level: int, worldview: str = "") -> Path:
|
||||
"""Download (or use cached) shapefile components for one NE layer.
|
||||
|
||||
Returns the path to the `.shp` file; sibling `.shx`/`.dbf`/`.prj`/`.cpg`
|
||||
files live alongside as mapshaper requires.
|
||||
"""
|
||||
if admin_level == 0:
|
||||
suffix = f"_{worldview}" if worldview else ""
|
||||
basename = f"ne_10m_admin_0_countries{suffix}"
|
||||
elif admin_level == 1:
|
||||
# NE only publishes worldview-specific files at Admin 0. Admin 1
|
||||
# uses a single file with per-feature `WORLDVIEW` attributes.
|
||||
basename = "ne_10m_admin_1_states_provinces"
|
||||
else:
|
||||
raise ValueError(f"Unsupported admin_level={admin_level}")
|
||||
|
||||
target_shp = CACHE_DIR / f"{basename}.shp"
|
||||
if target_shp.exists():
|
||||
return target_shp
|
||||
|
||||
CACHE_DIR.mkdir(exist_ok=True)
|
||||
log(f"Downloading NE {basename} (worldview={worldview or 'default'})…")
|
||||
for ext in SHAPEFILE_EXTS:
|
||||
url = f"{NE_RAW_URL}/{basename}.{ext}"
|
||||
dest = CACHE_DIR / f"{basename}.{ext}"
|
||||
try:
|
||||
urllib.request.urlretrieve(url, dest)
|
||||
except urllib.error.HTTPError as e:
|
||||
if ext == "cpg" and e.code == 404:
|
||||
# .cpg is optional in shapefile bundles
|
||||
continue
|
||||
raise
|
||||
|
||||
return target_shp
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Shapefile → GeoJSON via mapshaper CLI
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def shp_to_geojson(shp: Path, output: Path) -> None:
|
||||
"""Convert a shapefile to GeoJSON FeatureCollection."""
|
||||
if shutil.which("npx") is None:
|
||||
raise RuntimeError(
|
||||
"npx not found in PATH; mapshaper is required for shapefile conversion"
|
||||
)
|
||||
log(f" mapshaper: {shp.name} → {output.name}")
|
||||
subprocess.run(
|
||||
["npx", "--yes", "mapshaper", str(shp), "-o", str(output), "format=geojson"],
|
||||
check=True,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Match helpers
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def _matches(props: dict[str, Any], conditions: dict[str, Any]) -> bool:
|
||||
"""Check whether a feature's properties satisfy all conditions in match.
|
||||
|
||||
Supports two value forms:
|
||||
- scalar: exact equality
|
||||
- {in: [...]}: membership in a list
|
||||
"""
|
||||
for k, want in conditions.items():
|
||||
got = props.get(k)
|
||||
if isinstance(want, dict) and "in" in want:
|
||||
if got not in want["in"]:
|
||||
return False
|
||||
else:
|
||||
if got != want:
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Transforms
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def apply_name_overrides(geo: dict, overrides: list[dict]) -> dict:
|
||||
"""Apply attribute overrides from name_overrides.yaml."""
|
||||
n_applied = 0
|
||||
for entry in overrides:
|
||||
match = entry["match"]
|
||||
new_values = entry["set"]
|
||||
for feature in geo["features"]:
|
||||
props = feature["properties"]
|
||||
if _matches(props, match):
|
||||
props.update(new_values)
|
||||
n_applied += 1
|
||||
log(f" name_overrides: applied {n_applied} field updates "
|
||||
f"across {len(overrides)} entries")
|
||||
return geo
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Main
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
def main() -> int:
|
||||
OUTPUT_DIR.mkdir(exist_ok=True)
|
||||
|
||||
log(f"Country Map build — pinned to NE {NE_PINNED_TAG} ({NE_PINNED_SHA[:8]})")
|
||||
|
||||
# Load configs
|
||||
name_overrides = yaml.safe_load(
|
||||
(CONFIG_DIR / "name_overrides.yaml").read_text()
|
||||
)["overrides"]
|
||||
log(f"Loaded {len(name_overrides)} name override entries")
|
||||
|
||||
# POC scope: UA worldview, Admin 0 only. Future commits expand this.
|
||||
worldview = "ukr"
|
||||
admin_level = 0
|
||||
|
||||
log(f"\nBuilding worldview={worldview} admin_level={admin_level}")
|
||||
shp = fetch_ne_shapefile(admin_level, worldview)
|
||||
raw_geojson = OUTPUT_DIR / f"_raw_{worldview}_admin{admin_level}.geo.json"
|
||||
shp_to_geojson(shp, raw_geojson)
|
||||
|
||||
geo = json.loads(raw_geojson.read_text())
|
||||
log(f" loaded {len(geo['features'])} features")
|
||||
|
||||
geo = apply_name_overrides(geo, name_overrides)
|
||||
# TODO(next-commit): flying_islands, territory_assignments,
|
||||
# composite_maps, regional_aggregations, simplification, procedural/
|
||||
|
||||
final = OUTPUT_DIR / f"{worldview}_admin{admin_level}.geo.json"
|
||||
final.write_text(json.dumps(geo))
|
||||
log(f" wrote {final} ({final.stat().st_size:,} bytes)")
|
||||
|
||||
# Cleanup intermediate
|
||||
raw_geojson.unlink()
|
||||
|
||||
log("\nDone.")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
21
superset-frontend/plugins/plugin-chart-country-map/scripts/build.sh
Executable file
21
superset-frontend/plugins/plugin-chart-country-map/scripts/build.sh
Executable file
@@ -0,0 +1,21 @@
|
||||
#!/usr/bin/env bash
|
||||
# Country Map build pipeline.
|
||||
#
|
||||
# One-shot, reproducible: pinned upstream NE version, deterministic outputs.
|
||||
# Replaces the legacy Jupyter notebook. See README.md for details.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
# Sanity checks
|
||||
command -v python3 >/dev/null || { echo "python3 required" >&2; exit 1; }
|
||||
command -v npx >/dev/null || { echo "npx (Node.js) required for mapshaper" >&2; exit 1; }
|
||||
|
||||
python3 -c "import yaml" 2>/dev/null || {
|
||||
echo "PyYAML required: pip install pyyaml" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
exec python3 build.py "$@"
|
||||
Reference in New Issue
Block a user