feat(country-map): build script — split Admin 1 per country

The monolithic ukr_admin1.geo.json (15MB / 4595 features) was a single file that any chart would have to download in full just to render one country's subdivisions. Replace with per-country files keyed by adm0_a3, each individually simplified. Also drops single-subdivision countries (useless as choropleths) at this stage, mirroring the notebook's auto-purge. Output stats from full run: Files: 220 total 1 × admin0 (world) ............ 2.1 MB 4 × regional aggregations ..... 23-32 KB each 1 × composite (france_overseas) 322 KB 214 × per-country admin1 ........ 17 KB - 662 KB each (GBR largest) Per-chart payload: world choropleth → ukr_admin0.geo.json 2.1 MB France departments → ukr_admin1_FRA.geo.json 308 KB US states → ukr_admin1_USA.geo.json ~250 KB Türkiye NUTS-1 → regional_TUR_nuts_1_ukr.geo.json 23 KB France w/ overseas → composite_france_overseas_ukr.geo.json 322 KB All well within usable browser payload range. The plugin will lazy-load only what's needed for the current chart's worldview/admin-level/country. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-22 00:05:15 +00:00 · 2026-05-12 16:39:47 -07:00
parent 989ed61f34
commit d7edbf747a
1 changed files with 53 additions and 11 deletions
--- a/superset-frontend/plugins/plugin-chart-country-map/scripts/build.py
+++ b/superset-frontend/plugins/plugin-chart-country-map/scripts/build.py
@@ -736,27 +736,69 @@ def build_one(

    # TODO(future): procedural/

-    # Write transformed GeoJSON to an intermediate path, then run
-    # mapshaper -simplify into the final output. Two-stage approach so
-    # the Python transforms work on full-resolution geometry.
    wv_label = worldview or "default"
+
+    if admin_level == 1:
+        # Per-country split: each chart loads only its country's data
+        # (~50KB-1MB) instead of the full ~15MB global Admin 1 layer.
+        country_outputs = _write_admin1_per_country(geo, wv_label, simplify_pct=5.0)
+        log(
+            f"  wrote {len(country_outputs)} per-country Admin 1 files "
+            f"(total {sum(p.stat().st_size for p in country_outputs):,} bytes)"
+        )
+        raw.unlink()
+        return country_outputs[0] if country_outputs else raw  # placeholder return
+
+    # Admin 0: single global file (one feature per country = small enough)
    transformed = OUTPUT_DIR / f"_transformed_{wv_label}_admin{admin_level}.geo.json"
    transformed.write_text(json.dumps(geo))
-
    final = OUTPUT_DIR / f"{wv_label}_admin{admin_level}.geo.json"
    simplify_geojson(transformed, final, percentage=5.0)
-
-    final_size = final.stat().st_size
-    pre_size = transformed.stat().st_size
-    reduction = 100 * (1 - final_size / pre_size) if pre_size else 0
-    log(f"  wrote {final.name} ({final_size:,} bytes, "
-        f"{len(geo['features'])} features, simplified -{reduction:.0f}%)")
-
+    log(f"  wrote {final.name} ({final.stat().st_size:,} bytes, "
+        f"{len(geo['features'])} features)")
    raw.unlink()
    transformed.unlink()
    return final


+def _write_admin1_per_country(
+    geo: dict,
+    wv_label: str,
+    simplify_pct: float = 5.0,
+) -> list[Path]:
+    """Split global Admin 1 into one GeoJSON per country, each simplified."""
+    from collections import defaultdict
+
+    by_country: dict[str, list[dict]] = defaultdict(list)
+    for f in geo["features"]:
+        a3 = f["properties"].get("adm0_a3")
+        if a3:
+            by_country[a3].append(f)
+
+    outputs: list[Path] = []
+    for a3, features in sorted(by_country.items()):
+        if len(features) < 2:
+            # Single-subdivision countries are useless as choropleths.
+            continue
+        country_geo = {"type": "FeatureCollection", "features": features}
+        inter = OUTPUT_DIR / f"_admin1_{a3}_{wv_label}_pre.geo.json"
+        inter.write_text(json.dumps(country_geo))
+        out = OUTPUT_DIR / f"{wv_label}_admin1_{a3}.geo.json"
+        subprocess.run(
+            [
+                "npx", "--yes", "mapshaper",
+                str(inter),
+                "-simplify", f"{simplify_pct}%", "keep-shapes",
+                "-o", str(out), "format=geojson",
+            ],
+            check=True,
+            stderr=subprocess.DEVNULL,
+        )
+        inter.unlink()
+        outputs.append(out)
+    return outputs
+
+
 def main() -> int:
    OUTPUT_DIR.mkdir(exist_ok=True)