feat: Add geospatial post processing operations (#9661)

* feat: Add geospatial post processing operations

* Linting

* Refactor

* Add tests

* Improve docs

* Address comments

* fix latitude/longitude mixup

* fix: bad refactor by pycharm
This commit is contained in:
Ville Brofeldt
2020-04-28 20:15:16 +03:00
committed by GitHub
parent c474ea848a
commit a52cfcd234
5 changed files with 322 additions and 21 deletions

View File

@@ -16,7 +16,7 @@
# under the License.
# isort:skip_file
import math
from typing import Any, List
from typing import Any, List, Optional
from pandas import Series
@@ -24,7 +24,7 @@ from superset.exceptions import QueryObjectValidationError
from superset.utils import pandas_postprocessing as proc
from .base_tests import SupersetTestCase
from .fixtures.dataframes import categories_df, timeseries_df
from .fixtures.dataframes import categories_df, lonlat_df, timeseries_df
def series_to_list(series: Series) -> List[Any]:
@@ -43,6 +43,19 @@ def series_to_list(series: Series) -> List[Any]:
]
def round_floats(
floats: List[Optional[float]], precision: int
) -> List[Optional[float]]:
"""
Round list of floats to certain precision
:param floats: floats to round
:param precision: intended decimal precision
:return: rounded floats
"""
return [round(val, precision) if val else None for val in floats]
class PostProcessingTestCase(SupersetTestCase):
def test_pivot(self):
aggregates = {"idx_nulls": {"operator": "sum"}}
@@ -219,25 +232,40 @@ class PostProcessingTestCase(SupersetTestCase):
post_df = proc.select(df=timeseries_df, columns=["label"])
self.assertListEqual(post_df.columns.tolist(), ["label"])
# rename one column
# rename and select one column
post_df = proc.select(df=timeseries_df, columns=["y"], rename={"y": "y1"})
self.assertListEqual(post_df.columns.tolist(), ["y1"])
# rename one and leave one unchanged
post_df = proc.select(
df=timeseries_df, columns=["label", "y"], rename={"y": "y1"}
)
post_df = proc.select(df=timeseries_df, rename={"y": "y1"})
self.assertListEqual(post_df.columns.tolist(), ["label", "y1"])
# drop one column
post_df = proc.select(df=timeseries_df, exclude=["label"])
self.assertListEqual(post_df.columns.tolist(), ["y"])
# rename and drop one column
post_df = proc.select(df=timeseries_df, rename={"y": "y1"}, exclude=["label"])
self.assertListEqual(post_df.columns.tolist(), ["y1"])
# invalid columns
self.assertRaises(
QueryObjectValidationError,
proc.select,
df=timeseries_df,
columns=["qwerty"],
columns=["abc"],
rename={"abc": "qwerty"},
)
# select renamed column by new name
self.assertRaises(
QueryObjectValidationError,
proc.select,
df=timeseries_df,
columns=["label_new"],
rename={"label": "label_new"},
)
def test_diff(self):
# overwrite column
post_df = proc.diff(df=timeseries_df, columns={"y": "y"})
@@ -288,3 +316,83 @@ class PostProcessingTestCase(SupersetTestCase):
columns={"y": "y"},
operator="abc",
)
def test_geohash_decode(self):
# decode lon/lat from geohash
post_df = proc.geohash_decode(
df=lonlat_df[["city", "geohash"]],
geohash="geohash",
latitude="latitude",
longitude="longitude",
)
self.assertListEqual(
sorted(post_df.columns.tolist()),
sorted(["city", "geohash", "latitude", "longitude"]),
)
self.assertListEqual(
round_floats(series_to_list(post_df["longitude"]), 6),
round_floats(series_to_list(lonlat_df["longitude"]), 6),
)
self.assertListEqual(
round_floats(series_to_list(post_df["latitude"]), 6),
round_floats(series_to_list(lonlat_df["latitude"]), 6),
)
def test_geohash_encode(self):
# encode lon/lat into geohash
post_df = proc.geohash_encode(
df=lonlat_df[["city", "latitude", "longitude"]],
latitude="latitude",
longitude="longitude",
geohash="geohash",
)
self.assertListEqual(
sorted(post_df.columns.tolist()),
sorted(["city", "geohash", "latitude", "longitude"]),
)
self.assertListEqual(
series_to_list(post_df["geohash"]), series_to_list(lonlat_df["geohash"]),
)
def test_geodetic_parse(self):
# parse geodetic string with altitude into lon/lat/altitude
post_df = proc.geodetic_parse(
df=lonlat_df[["city", "geodetic"]],
geodetic="geodetic",
latitude="latitude",
longitude="longitude",
altitude="altitude",
)
self.assertListEqual(
sorted(post_df.columns.tolist()),
sorted(["city", "geodetic", "latitude", "longitude", "altitude"]),
)
self.assertListEqual(
series_to_list(post_df["longitude"]),
series_to_list(lonlat_df["longitude"]),
)
self.assertListEqual(
series_to_list(post_df["latitude"]), series_to_list(lonlat_df["latitude"]),
)
self.assertListEqual(
series_to_list(post_df["altitude"]), series_to_list(lonlat_df["altitude"]),
)
# parse geodetic string into lon/lat
post_df = proc.geodetic_parse(
df=lonlat_df[["city", "geodetic"]],
geodetic="geodetic",
latitude="latitude",
longitude="longitude",
)
self.assertListEqual(
sorted(post_df.columns.tolist()),
sorted(["city", "geodetic", "latitude", "longitude"]),
)
self.assertListEqual(
series_to_list(post_df["longitude"]),
series_to_list(lonlat_df["longitude"]),
)
self.assertListEqual(
series_to_list(post_df["latitude"]), series_to_list(lonlat_df["latitude"]),
)