mirror of
https://github.com/apache/superset.git
synced 2026-04-19 08:04:53 +00:00
feat: Add geospatial post processing operations (#9661)
* feat: Add geospatial post processing operations * Linting * Refactor * Add tests * Improve docs * Address comments * fix latitude/longitude mixup * fix: bad refactor by pycharm
This commit is contained in:
14
tests/fixtures/dataframes.py
vendored
14
tests/fixtures/dataframes.py
vendored
@@ -119,3 +119,17 @@ timeseries_df = DataFrame(
|
||||
index=to_datetime(["2019-01-01", "2019-01-02", "2019-01-05", "2019-01-07"]),
|
||||
data={"label": ["x", "y", "z", "q"], "y": [1.0, 2.0, 3.0, 4.0]},
|
||||
)
|
||||
|
||||
lonlat_df = DataFrame(
|
||||
{
|
||||
"city": ["New York City", "Sydney"],
|
||||
"geohash": ["dr5regw3pg6f", "r3gx2u9qdevk"],
|
||||
"latitude": [40.71277496, -33.85598011],
|
||||
"longitude": [-74.00597306, 151.20666526],
|
||||
"altitude": [5.5, 0.012],
|
||||
"geodetic": [
|
||||
"40.71277496, -74.00597306, 5.5km",
|
||||
"-33.85598011, 151.20666526, 12m",
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
# under the License.
|
||||
# isort:skip_file
|
||||
import math
|
||||
from typing import Any, List
|
||||
from typing import Any, List, Optional
|
||||
|
||||
from pandas import Series
|
||||
|
||||
@@ -24,7 +24,7 @@ from superset.exceptions import QueryObjectValidationError
|
||||
from superset.utils import pandas_postprocessing as proc
|
||||
|
||||
from .base_tests import SupersetTestCase
|
||||
from .fixtures.dataframes import categories_df, timeseries_df
|
||||
from .fixtures.dataframes import categories_df, lonlat_df, timeseries_df
|
||||
|
||||
|
||||
def series_to_list(series: Series) -> List[Any]:
|
||||
@@ -43,6 +43,19 @@ def series_to_list(series: Series) -> List[Any]:
|
||||
]
|
||||
|
||||
|
||||
def round_floats(
|
||||
floats: List[Optional[float]], precision: int
|
||||
) -> List[Optional[float]]:
|
||||
"""
|
||||
Round list of floats to certain precision
|
||||
|
||||
:param floats: floats to round
|
||||
:param precision: intended decimal precision
|
||||
:return: rounded floats
|
||||
"""
|
||||
return [round(val, precision) if val else None for val in floats]
|
||||
|
||||
|
||||
class PostProcessingTestCase(SupersetTestCase):
|
||||
def test_pivot(self):
|
||||
aggregates = {"idx_nulls": {"operator": "sum"}}
|
||||
@@ -219,25 +232,40 @@ class PostProcessingTestCase(SupersetTestCase):
|
||||
post_df = proc.select(df=timeseries_df, columns=["label"])
|
||||
self.assertListEqual(post_df.columns.tolist(), ["label"])
|
||||
|
||||
# rename one column
|
||||
# rename and select one column
|
||||
post_df = proc.select(df=timeseries_df, columns=["y"], rename={"y": "y1"})
|
||||
self.assertListEqual(post_df.columns.tolist(), ["y1"])
|
||||
|
||||
# rename one and leave one unchanged
|
||||
post_df = proc.select(
|
||||
df=timeseries_df, columns=["label", "y"], rename={"y": "y1"}
|
||||
)
|
||||
post_df = proc.select(df=timeseries_df, rename={"y": "y1"})
|
||||
self.assertListEqual(post_df.columns.tolist(), ["label", "y1"])
|
||||
|
||||
# drop one column
|
||||
post_df = proc.select(df=timeseries_df, exclude=["label"])
|
||||
self.assertListEqual(post_df.columns.tolist(), ["y"])
|
||||
|
||||
# rename and drop one column
|
||||
post_df = proc.select(df=timeseries_df, rename={"y": "y1"}, exclude=["label"])
|
||||
self.assertListEqual(post_df.columns.tolist(), ["y1"])
|
||||
|
||||
# invalid columns
|
||||
self.assertRaises(
|
||||
QueryObjectValidationError,
|
||||
proc.select,
|
||||
df=timeseries_df,
|
||||
columns=["qwerty"],
|
||||
columns=["abc"],
|
||||
rename={"abc": "qwerty"},
|
||||
)
|
||||
|
||||
# select renamed column by new name
|
||||
self.assertRaises(
|
||||
QueryObjectValidationError,
|
||||
proc.select,
|
||||
df=timeseries_df,
|
||||
columns=["label_new"],
|
||||
rename={"label": "label_new"},
|
||||
)
|
||||
|
||||
def test_diff(self):
|
||||
# overwrite column
|
||||
post_df = proc.diff(df=timeseries_df, columns={"y": "y"})
|
||||
@@ -288,3 +316,83 @@ class PostProcessingTestCase(SupersetTestCase):
|
||||
columns={"y": "y"},
|
||||
operator="abc",
|
||||
)
|
||||
|
||||
def test_geohash_decode(self):
|
||||
# decode lon/lat from geohash
|
||||
post_df = proc.geohash_decode(
|
||||
df=lonlat_df[["city", "geohash"]],
|
||||
geohash="geohash",
|
||||
latitude="latitude",
|
||||
longitude="longitude",
|
||||
)
|
||||
self.assertListEqual(
|
||||
sorted(post_df.columns.tolist()),
|
||||
sorted(["city", "geohash", "latitude", "longitude"]),
|
||||
)
|
||||
self.assertListEqual(
|
||||
round_floats(series_to_list(post_df["longitude"]), 6),
|
||||
round_floats(series_to_list(lonlat_df["longitude"]), 6),
|
||||
)
|
||||
self.assertListEqual(
|
||||
round_floats(series_to_list(post_df["latitude"]), 6),
|
||||
round_floats(series_to_list(lonlat_df["latitude"]), 6),
|
||||
)
|
||||
|
||||
def test_geohash_encode(self):
|
||||
# encode lon/lat into geohash
|
||||
post_df = proc.geohash_encode(
|
||||
df=lonlat_df[["city", "latitude", "longitude"]],
|
||||
latitude="latitude",
|
||||
longitude="longitude",
|
||||
geohash="geohash",
|
||||
)
|
||||
self.assertListEqual(
|
||||
sorted(post_df.columns.tolist()),
|
||||
sorted(["city", "geohash", "latitude", "longitude"]),
|
||||
)
|
||||
self.assertListEqual(
|
||||
series_to_list(post_df["geohash"]), series_to_list(lonlat_df["geohash"]),
|
||||
)
|
||||
|
||||
def test_geodetic_parse(self):
|
||||
# parse geodetic string with altitude into lon/lat/altitude
|
||||
post_df = proc.geodetic_parse(
|
||||
df=lonlat_df[["city", "geodetic"]],
|
||||
geodetic="geodetic",
|
||||
latitude="latitude",
|
||||
longitude="longitude",
|
||||
altitude="altitude",
|
||||
)
|
||||
self.assertListEqual(
|
||||
sorted(post_df.columns.tolist()),
|
||||
sorted(["city", "geodetic", "latitude", "longitude", "altitude"]),
|
||||
)
|
||||
self.assertListEqual(
|
||||
series_to_list(post_df["longitude"]),
|
||||
series_to_list(lonlat_df["longitude"]),
|
||||
)
|
||||
self.assertListEqual(
|
||||
series_to_list(post_df["latitude"]), series_to_list(lonlat_df["latitude"]),
|
||||
)
|
||||
self.assertListEqual(
|
||||
series_to_list(post_df["altitude"]), series_to_list(lonlat_df["altitude"]),
|
||||
)
|
||||
|
||||
# parse geodetic string into lon/lat
|
||||
post_df = proc.geodetic_parse(
|
||||
df=lonlat_df[["city", "geodetic"]],
|
||||
geodetic="geodetic",
|
||||
latitude="latitude",
|
||||
longitude="longitude",
|
||||
)
|
||||
self.assertListEqual(
|
||||
sorted(post_df.columns.tolist()),
|
||||
sorted(["city", "geodetic", "latitude", "longitude"]),
|
||||
)
|
||||
self.assertListEqual(
|
||||
series_to_list(post_df["longitude"]),
|
||||
series_to_list(lonlat_df["longitude"]),
|
||||
)
|
||||
self.assertListEqual(
|
||||
series_to_list(post_df["latitude"]), series_to_list(lonlat_df["latitude"]),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user