feat(country-map): build script — territory_assignments transform

Implements the third transform: pull features from sibling Admin 0
records into a destination country's Admin 1 view. Used for:
- China + Taiwan/HK/Macau (NE keeps each as separate Admin 0)
- Finland + Åland (missing from FIN admin 1; NE keeps Åland as ALD
  admin 0)

Verified on real data:
  Building worldview=ukr admin_level=1
    territory_assignments: added 4 features from sibling Admin 0 records

(4 = TWN/HKG/MAC + ALD; ARMM-renamed BARMM region picks up correctly
because name_overrides ran first.)

Two bugs fixed along the way:

1. **Property name casing.** NE Admin 0 ships with uppercase property
   names (ADM0_A3, NAME_EN), Admin 1 with lowercase. All transforms
   downstream assume lowercase, so we now normalize to lowercase at
   shapefile-conversion time. Bonus: fixes a silent flying_islands
   bug where `adm0_a3` filters never matched at Admin 0 because the
   props were uppercase.

2. **drop_outside_bbox at Admin 0.** A country's multi-polygon often
   includes overseas territories (Netherlands → Caribbean), so bbox
   filtering at Admin 0 would drop entire countries. Now guarded to
   only run at Admin 1 where each feature is a single subdivision.

3. **Åland's NE code.** NE uses ALD, not the ISO 3166-1 ALA. Updated
   territory_assignments.yaml with comment noting the divergence.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Evan Rusackas
2026-05-12 16:27:04 -07:00
parent ae5e1132ba
commit cb005a2ea5
2 changed files with 106 additions and 12 deletions

View File

@@ -108,7 +108,13 @@ def fetch_ne_shapefile(admin_level: int, worldview: str = "") -> Path:
def shp_to_geojson(shp: Path, output: Path) -> None:
"""Convert a shapefile to GeoJSON FeatureCollection."""
"""Convert a shapefile to GeoJSON FeatureCollection.
Also normalizes property names to lowercase: NE ships Admin 0 with
uppercase field names (ADM0_A3, NAME_EN, ...) and Admin 1 with
lowercase (adm0_a3, name_en, ...). All transforms downstream assume
lowercase, so we normalize at conversion time.
"""
if shutil.which("npx") is None:
raise RuntimeError(
"npx not found in PATH; mapshaper is required for shapefile conversion"
@@ -119,6 +125,16 @@ def shp_to_geojson(shp: Path, output: Path) -> None:
check=True,
stderr=subprocess.DEVNULL,
)
_normalize_property_keys(output)
def _normalize_property_keys(geojson_path: Path) -> None:
"""Lowercase all feature property keys in-place."""
geo = json.loads(geojson_path.read_text())
for f in geo.get("features", []):
props = f.get("properties") or {}
f["properties"] = {k.lower(): v for k, v in props.items()}
geojson_path.write_text(json.dumps(geo))
def simplify_geojson(src: Path, dst: Path, percentage: float = 5.0) -> None:
@@ -261,7 +277,58 @@ def _bbox_contains(geom: dict, nw: list[float], se: list[float]) -> bool:
)
def apply_flying_islands(geo: dict, config: dict, country_a3: str | None) -> dict:
def apply_territory_assignments(
geo: dict,
config: dict,
admin0_geo: dict,
) -> dict:
"""Pull features from sibling Admin 0 records into a destination country.
Operates on Admin 1 outputs only — the use cases (China + SARs,
Finland + Åland) all pull from Admin 0 records of one country and
add them as single Admin 1 subdivisions of another.
`admin0_geo` must already be loaded by the caller — passed in to
avoid re-downloading.
"""
countries = config.get("countries", {})
if not countries:
log(" territory_assignments: nothing to apply (config empty)")
return geo
n_added = 0
for dest_a3, rules in countries.items():
for entry in rules.get("additions", []):
from_spec = entry["from"]
source_a3 = from_spec["adm0_a3"]
source_match = from_spec.get("match", {})
for f in admin0_geo["features"]:
p = f["properties"]
if p.get("adm0_a3") != source_a3:
continue
if source_match and not _matches(p, source_match):
continue
# Deep copy; reattach to destination country
new_feature = json.loads(json.dumps(f))
new_feature["properties"]["adm0_a3"] = dest_a3
if "set" in entry:
new_feature["properties"].update(entry["set"])
geo["features"].append(new_feature)
n_added += 1
break # take first match per addition entry
log(f" territory_assignments: added {n_added} features from sibling Admin 0 records")
return geo
def apply_flying_islands(
geo: dict,
config: dict,
country_a3: str | None,
admin_level: int,
) -> dict:
"""Apply flying_islands.yaml transforms.
For Admin 0 outputs, `country_a3` is None and we apply each country's
@@ -295,8 +362,11 @@ def apply_flying_islands(geo: dict, config: dict, country_a3: str | None) -> dic
)
n_repos += 1
# Drop outside bbox
drop = rules.get("drop_outside_bbox")
# Drop outside bbox — only meaningful at Admin 1 (where each
# feature is a single subdivision). At Admin 0 a country's
# multi-polygon often extends to overseas territories, so the
# bbox check would drop entire countries.
drop = rules.get("drop_outside_bbox") if admin_level == 1 else None
if drop:
nw, se = drop["nw"], drop["se"]
kept: list[dict] = []
@@ -327,6 +397,7 @@ def build_one(
admin_level: int,
name_overrides: list[dict],
flying_islands: dict,
territory_assignments: dict,
) -> Path:
"""Build one (worldview, admin_level) GeoJSON. Returns the output path."""
log(f"\nBuilding worldview={worldview or 'default'} admin_level={admin_level}")
@@ -338,9 +409,21 @@ def build_one(
log(f" loaded {len(geo['features'])} features")
geo = apply_name_overrides(geo, name_overrides)
geo = apply_flying_islands(geo, flying_islands, country_a3=None)
# TODO(future): territory_assignments, composite_maps,
# regional_aggregations, procedural/
geo = apply_flying_islands(geo, flying_islands, country_a3=None, admin_level=admin_level)
# territory_assignments only makes sense at Admin 1 — the additions
# (China+SARs, Finland+Åland) inject Admin-0-sized features as
# single subdivisions of a destination country.
if admin_level == 1 and territory_assignments.get("countries"):
admin0_shp = fetch_ne_shapefile(0, worldview)
admin0_path = OUTPUT_DIR / f"_admin0_for_assignments_{worldview or 'default'}.geo.json"
if not admin0_path.exists():
shp_to_geojson(admin0_shp, admin0_path)
admin0_geo = json.loads(admin0_path.read_text())
geo = apply_territory_assignments(geo, territory_assignments, admin0_geo)
admin0_path.unlink(missing_ok=True)
# TODO(future): composite_maps, regional_aggregations, procedural/
# Write transformed GeoJSON to an intermediate path, then run
# mapshaper -simplify into the final output. Two-stage approach so
@@ -375,8 +458,13 @@ def main() -> int:
flying_islands = yaml.safe_load(
(CONFIG_DIR / "flying_islands.yaml").read_text()
)
territory_assignments = yaml.safe_load(
(CONFIG_DIR / "territory_assignments.yaml").read_text()
)
log(f"Loaded {len(name_overrides)} name override entries")
log(f"Loaded flying_islands rules for {len(flying_islands.get('countries', {}))} countries")
log(f"Loaded territory_assignments rules for "
f"{len(territory_assignments.get('countries', {}))} countries")
# POC scope: UA worldview, both Admin 0 and Admin 1. Future commits
# add more worldviews (Default, and other major NE worldviews).
@@ -386,7 +474,13 @@ def main() -> int:
]
for worldview, admin_level in targets:
build_one(worldview, admin_level, name_overrides, flying_islands)
build_one(
worldview,
admin_level,
name_overrides,
flying_islands,
territory_assignments,
)
log("\nDone.")
return 0

View File

@@ -66,15 +66,15 @@ countries:
# -------------------------------------------------------------------
# Finland — add Åland
# NE has Åland as a separate Admin 0 record (ALA) and it is missing
# from the FIN admin1 dataset. Re-attach it as FI-01 with the Finnish
# name "Ahvenanmaan maakunta".
# NE has Åland as a separate Admin 0 record (note: NE uses ALD, not
# the ISO 3166-1 ALA) and it is missing from the FIN admin1 dataset.
# Re-attach it as FI-01 with the Finnish name "Ahvenanmaan maakunta".
# -------------------------------------------------------------------
FIN:
additions:
- description: Add Åland as Finland subdivision FI-01
from:
adm0_a3: ALA
adm0_a3: ALD # NE-specific code; ISO equivalent is ALA
match: { name_en: Åland }
set:
iso_3166_2: FI-01