[SQL Lab] Async query results serialization with MessagePack and PyArrow (#8069)

* Add support for msgpack results_backend serialization

* Serialize DataFrame with PyArrow rather than JSON

* Adjust dependencies, de-lint

* Add tests for (de)serialization methods

* Add MessagePack config info to Installation docs

* Enable msgpack/arrow serialization by default

* [Fix] Prevent msgpack serialization on synchronous queries

* Add type annotations
This commit is contained in:
Rob DiCiuccio
2019-08-27 14:23:40 -07:00
committed by Maxime Beauchemin
parent 56566c2645
commit 7595d9e5fd
13 changed files with 362 additions and 28 deletions

View File

@@ -100,19 +100,27 @@ class SupersetDataFrame(object):
except Exception as e:
logging.exception(e)
@property
def raw_df(self):
return self.df
@property
def size(self):
return len(self.df.index)
@property
def data(self):
return self.format_data(self.df)
@classmethod
def format_data(cls, df):
# work around for https://github.com/pandas-dev/pandas/issues/18372
data = [
dict(
(k, maybe_box_datetimelike(v))
for k, v in zip(self.df.columns, np.atleast_1d(row))
for k, v in zip(df.columns, np.atleast_1d(row))
)
for row in self.df.values
for row in df.values
]
for d in data:
for k, v in list(d.items()):