feat(country-map): build script — split Admin 1 per country

The monolithic ukr_admin1.geo.json (15MB / 4595 features) was a single
file that any chart would have to download in full just to render one
country's subdivisions. Replace with per-country files keyed by
adm0_a3, each individually simplified.

Also drops single-subdivision countries (useless as choropleths) at
this stage, mirroring the notebook's auto-purge.

Output stats from full run:
  Files: 220 total
    1 × admin0 (world) ............ 2.1 MB
    4 × regional aggregations ..... 23-32 KB each
    1 × composite (france_overseas) 322 KB
  214 × per-country admin1 ........ 17 KB - 662 KB each (GBR largest)

Per-chart payload:
  world choropleth   → ukr_admin0.geo.json                  2.1 MB
  France departments → ukr_admin1_FRA.geo.json              308 KB
  US states          → ukr_admin1_USA.geo.json              ~250 KB
  Türkiye NUTS-1     → regional_TUR_nuts_1_ukr.geo.json     23 KB
  France w/ overseas → composite_france_overseas_ukr.geo.json 322 KB

All well within usable browser payload range. The plugin will lazy-load
only what's needed for the current chart's worldview/admin-level/country.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Evan Rusackas
2026-05-12 16:39:47 -07:00
parent 989ed61f34
commit d7edbf747a

View File

@@ -736,27 +736,69 @@ def build_one(
# TODO(future): procedural/
# Write transformed GeoJSON to an intermediate path, then run
# mapshaper -simplify into the final output. Two-stage approach so
# the Python transforms work on full-resolution geometry.
wv_label = worldview or "default"
if admin_level == 1:
# Per-country split: each chart loads only its country's data
# (~50KB-1MB) instead of the full ~15MB global Admin 1 layer.
country_outputs = _write_admin1_per_country(geo, wv_label, simplify_pct=5.0)
log(
f" wrote {len(country_outputs)} per-country Admin 1 files "
f"(total {sum(p.stat().st_size for p in country_outputs):,} bytes)"
)
raw.unlink()
return country_outputs[0] if country_outputs else raw # placeholder return
# Admin 0: single global file (one feature per country = small enough)
transformed = OUTPUT_DIR / f"_transformed_{wv_label}_admin{admin_level}.geo.json"
transformed.write_text(json.dumps(geo))
final = OUTPUT_DIR / f"{wv_label}_admin{admin_level}.geo.json"
simplify_geojson(transformed, final, percentage=5.0)
final_size = final.stat().st_size
pre_size = transformed.stat().st_size
reduction = 100 * (1 - final_size / pre_size) if pre_size else 0
log(f" wrote {final.name} ({final_size:,} bytes, "
f"{len(geo['features'])} features, simplified -{reduction:.0f}%)")
log(f" wrote {final.name} ({final.stat().st_size:,} bytes, "
f"{len(geo['features'])} features)")
raw.unlink()
transformed.unlink()
return final
def _write_admin1_per_country(
geo: dict,
wv_label: str,
simplify_pct: float = 5.0,
) -> list[Path]:
"""Split global Admin 1 into one GeoJSON per country, each simplified."""
from collections import defaultdict
by_country: dict[str, list[dict]] = defaultdict(list)
for f in geo["features"]:
a3 = f["properties"].get("adm0_a3")
if a3:
by_country[a3].append(f)
outputs: list[Path] = []
for a3, features in sorted(by_country.items()):
if len(features) < 2:
# Single-subdivision countries are useless as choropleths.
continue
country_geo = {"type": "FeatureCollection", "features": features}
inter = OUTPUT_DIR / f"_admin1_{a3}_{wv_label}_pre.geo.json"
inter.write_text(json.dumps(country_geo))
out = OUTPUT_DIR / f"{wv_label}_admin1_{a3}.geo.json"
subprocess.run(
[
"npx", "--yes", "mapshaper",
str(inter),
"-simplify", f"{simplify_pct}%", "keep-shapes",
"-o", str(out), "format=geojson",
],
check=True,
stderr=subprocess.DEVNULL,
)
inter.unlink()
outputs.append(out)
return outputs
def main() -> int:
OUTPUT_DIR.mkdir(exist_ok=True)