mirror of
https://github.com/apache/superset.git
synced 2026-05-29 20:29:34 +00:00
feat(country-map): build script — territory_assignments transform
Implements the third transform: pull features from sibling Admin 0
records into a destination country's Admin 1 view. Used for:
- China + Taiwan/HK/Macau (NE keeps each as separate Admin 0)
- Finland + Åland (missing from FIN admin 1; NE keeps Åland as ALD
admin 0)
Verified on real data:
Building worldview=ukr admin_level=1
territory_assignments: added 4 features from sibling Admin 0 records
(4 = TWN/HKG/MAC + ALD; ARMM-renamed BARMM region picks up correctly
because name_overrides ran first.)
Two bugs fixed along the way:
1. **Property name casing.** NE Admin 0 ships with uppercase property
names (ADM0_A3, NAME_EN), Admin 1 with lowercase. All transforms
downstream assume lowercase, so we now normalize to lowercase at
shapefile-conversion time. Bonus: fixes a silent flying_islands
bug where `adm0_a3` filters never matched at Admin 0 because the
props were uppercase.
2. **drop_outside_bbox at Admin 0.** A country's multi-polygon often
includes overseas territories (Netherlands → Caribbean), so bbox
filtering at Admin 0 would drop entire countries. Now guarded to
only run at Admin 1 where each feature is a single subdivision.
3. **Åland's NE code.** NE uses ALD, not the ISO 3166-1 ALA. Updated
territory_assignments.yaml with comment noting the divergence.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -108,7 +108,13 @@ def fetch_ne_shapefile(admin_level: int, worldview: str = "") -> Path:
|
||||
|
||||
|
||||
def shp_to_geojson(shp: Path, output: Path) -> None:
|
||||
"""Convert a shapefile to GeoJSON FeatureCollection."""
|
||||
"""Convert a shapefile to GeoJSON FeatureCollection.
|
||||
|
||||
Also normalizes property names to lowercase: NE ships Admin 0 with
|
||||
uppercase field names (ADM0_A3, NAME_EN, ...) and Admin 1 with
|
||||
lowercase (adm0_a3, name_en, ...). All transforms downstream assume
|
||||
lowercase, so we normalize at conversion time.
|
||||
"""
|
||||
if shutil.which("npx") is None:
|
||||
raise RuntimeError(
|
||||
"npx not found in PATH; mapshaper is required for shapefile conversion"
|
||||
@@ -119,6 +125,16 @@ def shp_to_geojson(shp: Path, output: Path) -> None:
|
||||
check=True,
|
||||
stderr=subprocess.DEVNULL,
|
||||
)
|
||||
_normalize_property_keys(output)
|
||||
|
||||
|
||||
def _normalize_property_keys(geojson_path: Path) -> None:
|
||||
"""Lowercase all feature property keys in-place."""
|
||||
geo = json.loads(geojson_path.read_text())
|
||||
for f in geo.get("features", []):
|
||||
props = f.get("properties") or {}
|
||||
f["properties"] = {k.lower(): v for k, v in props.items()}
|
||||
geojson_path.write_text(json.dumps(geo))
|
||||
|
||||
|
||||
def simplify_geojson(src: Path, dst: Path, percentage: float = 5.0) -> None:
|
||||
@@ -261,7 +277,58 @@ def _bbox_contains(geom: dict, nw: list[float], se: list[float]) -> bool:
|
||||
)
|
||||
|
||||
|
||||
def apply_flying_islands(geo: dict, config: dict, country_a3: str | None) -> dict:
|
||||
def apply_territory_assignments(
|
||||
geo: dict,
|
||||
config: dict,
|
||||
admin0_geo: dict,
|
||||
) -> dict:
|
||||
"""Pull features from sibling Admin 0 records into a destination country.
|
||||
|
||||
Operates on Admin 1 outputs only — the use cases (China + SARs,
|
||||
Finland + Åland) all pull from Admin 0 records of one country and
|
||||
add them as single Admin 1 subdivisions of another.
|
||||
|
||||
`admin0_geo` must already be loaded by the caller — passed in to
|
||||
avoid re-downloading.
|
||||
"""
|
||||
countries = config.get("countries", {})
|
||||
if not countries:
|
||||
log(" territory_assignments: nothing to apply (config empty)")
|
||||
return geo
|
||||
|
||||
n_added = 0
|
||||
for dest_a3, rules in countries.items():
|
||||
for entry in rules.get("additions", []):
|
||||
from_spec = entry["from"]
|
||||
source_a3 = from_spec["adm0_a3"]
|
||||
source_match = from_spec.get("match", {})
|
||||
|
||||
for f in admin0_geo["features"]:
|
||||
p = f["properties"]
|
||||
if p.get("adm0_a3") != source_a3:
|
||||
continue
|
||||
if source_match and not _matches(p, source_match):
|
||||
continue
|
||||
|
||||
# Deep copy; reattach to destination country
|
||||
new_feature = json.loads(json.dumps(f))
|
||||
new_feature["properties"]["adm0_a3"] = dest_a3
|
||||
if "set" in entry:
|
||||
new_feature["properties"].update(entry["set"])
|
||||
geo["features"].append(new_feature)
|
||||
n_added += 1
|
||||
break # take first match per addition entry
|
||||
|
||||
log(f" territory_assignments: added {n_added} features from sibling Admin 0 records")
|
||||
return geo
|
||||
|
||||
|
||||
def apply_flying_islands(
|
||||
geo: dict,
|
||||
config: dict,
|
||||
country_a3: str | None,
|
||||
admin_level: int,
|
||||
) -> dict:
|
||||
"""Apply flying_islands.yaml transforms.
|
||||
|
||||
For Admin 0 outputs, `country_a3` is None and we apply each country's
|
||||
@@ -295,8 +362,11 @@ def apply_flying_islands(geo: dict, config: dict, country_a3: str | None) -> dic
|
||||
)
|
||||
n_repos += 1
|
||||
|
||||
# Drop outside bbox
|
||||
drop = rules.get("drop_outside_bbox")
|
||||
# Drop outside bbox — only meaningful at Admin 1 (where each
|
||||
# feature is a single subdivision). At Admin 0 a country's
|
||||
# multi-polygon often extends to overseas territories, so the
|
||||
# bbox check would drop entire countries.
|
||||
drop = rules.get("drop_outside_bbox") if admin_level == 1 else None
|
||||
if drop:
|
||||
nw, se = drop["nw"], drop["se"]
|
||||
kept: list[dict] = []
|
||||
@@ -327,6 +397,7 @@ def build_one(
|
||||
admin_level: int,
|
||||
name_overrides: list[dict],
|
||||
flying_islands: dict,
|
||||
territory_assignments: dict,
|
||||
) -> Path:
|
||||
"""Build one (worldview, admin_level) GeoJSON. Returns the output path."""
|
||||
log(f"\nBuilding worldview={worldview or 'default'} admin_level={admin_level}")
|
||||
@@ -338,9 +409,21 @@ def build_one(
|
||||
log(f" loaded {len(geo['features'])} features")
|
||||
|
||||
geo = apply_name_overrides(geo, name_overrides)
|
||||
geo = apply_flying_islands(geo, flying_islands, country_a3=None)
|
||||
# TODO(future): territory_assignments, composite_maps,
|
||||
# regional_aggregations, procedural/
|
||||
geo = apply_flying_islands(geo, flying_islands, country_a3=None, admin_level=admin_level)
|
||||
|
||||
# territory_assignments only makes sense at Admin 1 — the additions
|
||||
# (China+SARs, Finland+Åland) inject Admin-0-sized features as
|
||||
# single subdivisions of a destination country.
|
||||
if admin_level == 1 and territory_assignments.get("countries"):
|
||||
admin0_shp = fetch_ne_shapefile(0, worldview)
|
||||
admin0_path = OUTPUT_DIR / f"_admin0_for_assignments_{worldview or 'default'}.geo.json"
|
||||
if not admin0_path.exists():
|
||||
shp_to_geojson(admin0_shp, admin0_path)
|
||||
admin0_geo = json.loads(admin0_path.read_text())
|
||||
geo = apply_territory_assignments(geo, territory_assignments, admin0_geo)
|
||||
admin0_path.unlink(missing_ok=True)
|
||||
|
||||
# TODO(future): composite_maps, regional_aggregations, procedural/
|
||||
|
||||
# Write transformed GeoJSON to an intermediate path, then run
|
||||
# mapshaper -simplify into the final output. Two-stage approach so
|
||||
@@ -375,8 +458,13 @@ def main() -> int:
|
||||
flying_islands = yaml.safe_load(
|
||||
(CONFIG_DIR / "flying_islands.yaml").read_text()
|
||||
)
|
||||
territory_assignments = yaml.safe_load(
|
||||
(CONFIG_DIR / "territory_assignments.yaml").read_text()
|
||||
)
|
||||
log(f"Loaded {len(name_overrides)} name override entries")
|
||||
log(f"Loaded flying_islands rules for {len(flying_islands.get('countries', {}))} countries")
|
||||
log(f"Loaded territory_assignments rules for "
|
||||
f"{len(territory_assignments.get('countries', {}))} countries")
|
||||
|
||||
# POC scope: UA worldview, both Admin 0 and Admin 1. Future commits
|
||||
# add more worldviews (Default, and other major NE worldviews).
|
||||
@@ -386,7 +474,13 @@ def main() -> int:
|
||||
]
|
||||
|
||||
for worldview, admin_level in targets:
|
||||
build_one(worldview, admin_level, name_overrides, flying_islands)
|
||||
build_one(
|
||||
worldview,
|
||||
admin_level,
|
||||
name_overrides,
|
||||
flying_islands,
|
||||
territory_assignments,
|
||||
)
|
||||
|
||||
log("\nDone.")
|
||||
return 0
|
||||
|
||||
@@ -66,15 +66,15 @@ countries:
|
||||
|
||||
# -------------------------------------------------------------------
|
||||
# Finland — add Åland
|
||||
# NE has Åland as a separate Admin 0 record (ALA) and it is missing
|
||||
# from the FIN admin1 dataset. Re-attach it as FI-01 with the Finnish
|
||||
# name "Ahvenanmaan maakunta".
|
||||
# NE has Åland as a separate Admin 0 record (note: NE uses ALD, not
|
||||
# the ISO 3166-1 ALA) and it is missing from the FIN admin1 dataset.
|
||||
# Re-attach it as FI-01 with the Finnish name "Ahvenanmaan maakunta".
|
||||
# -------------------------------------------------------------------
|
||||
FIN:
|
||||
additions:
|
||||
- description: Add Åland as Finland subdivision FI-01
|
||||
from:
|
||||
adm0_a3: ALA
|
||||
adm0_a3: ALD # NE-specific code; ISO equivalent is ALA
|
||||
match: { name_en: Åland }
|
||||
set:
|
||||
iso_3166_2: FI-01
|
||||
|
||||
Reference in New Issue
Block a user