diff --git a/superset/result_set.py b/superset/result_set.py index bc78fd676cc..538d878fbd9 100644 --- a/superset/result_set.py +++ b/superset/result_set.py @@ -18,6 +18,7 @@ """ Superset wrapper around pyarrow.Table. """ import datetime +import json import logging import re from typing import Any, Callable, Dict, List, Optional, Tuple, Type @@ -27,6 +28,7 @@ import pandas as pd import pyarrow as pa from superset import db_engine_specs +from superset.utils import core as utils def dedup(l: List[str], suffix: str = "__", case_sensitive: bool = True) -> List[str]: @@ -86,7 +88,18 @@ class SupersetResultSet: # related: https://issues.apache.org/jira/browse/ARROW-5248 if pa_data: for i, column in enumerate(column_names): - if pa.types.is_temporal(pa_data[i].type): + # TODO: revisit nested column serialization once Arrow 1.0 is released with: + # https://github.com/apache/arrow/pull/6199 + # Related issue: #8978 + if pa.types.is_nested(pa_data[i].type): + stringify_func = lambda item: json.dumps( + item, default=utils.json_iso_dttm_ser + ) + vfunc = np.vectorize(stringify_func) + strigified_arr = vfunc(array[:, i]) + pa_data[i] = pa.array(strigified_arr) + + elif pa.types.is_temporal(pa_data[i].type): sample = self.first_nonempty(array[:, i]) if sample and isinstance(sample, datetime.datetime): try: diff --git a/tests/result_set_tests.py b/tests/result_set_tests.py index 977dbac511d..6e7df13b69c 100644 --- a/tests/result_set_tests.py +++ b/tests/result_set_tests.py @@ -124,6 +124,46 @@ class SupersetResultSetTestCase(SupersetTestCase): ], ) + def test_nested_types(self): + data = [ + ( + 4, + [{"table_name": "unicode_test", "database_id": 1}], + [1, 2, 3], + {"chart_name": "scatter"}, + ), + ( + 3, + [{"table_name": "birth_names", "database_id": 1}], + [4, 5, 6], + {"chart_name": "plot"}, + ), + ] + cursor_descr = [("id",), ("dict_arr",), ("num_arr",), ("map_col",)] + results = SupersetResultSet(data, cursor_descr, BaseEngineSpec) + self.assertEqual(results.columns[0]["type"], "INT") + self.assertEqual(results.columns[1]["type"], "STRING") + self.assertEqual(results.columns[2]["type"], "STRING") + self.assertEqual(results.columns[3]["type"], "STRING") + df = results.to_pandas_df() + self.assertEqual( + df_to_records(df), + [ + { + "id": 4, + "dict_arr": '[{"table_name": "unicode_test", "database_id": 1}]', + "num_arr": "[1, 2, 3]", + "map_col": '{"chart_name": "scatter"}', + }, + { + "id": 3, + "dict_arr": '[{"table_name": "birth_names", "database_id": 1}]', + "num_arr": "[4, 5, 6]", + "map_col": '{"chart_name": "plot"}', + }, + ], + ) + def test_empty_datetime(self): data = [(None,)] cursor_descr = [("ds", "timestamp", None, None, None, None, True)]