chore(sqllab): remove deprecated PyArrow API (#24135)

This commit is contained in:
Sebastian Liebscher
2023-05-20 20:57:18 +02:00
committed by GitHub
parent 66594ad647
commit 1583090623
5 changed files with 27 additions and 12 deletions

View File

@@ -122,6 +122,8 @@ geographiclib==1.52
# via geopy
geopy==2.2.0
# via apache-superset
greenlet==2.0.2
# via sqlalchemy
gunicorn==20.1.0
# via apache-superset
hashids==1.3.1
@@ -134,6 +136,8 @@ humanize==3.11.0
# via apache-superset
idna==3.2
# via email-validator
importlib-metadata==6.6.0
# via flask
importlib-resources==5.12.0
# via limits
isodate==0.6.0
@@ -209,7 +213,7 @@ prison==0.2.1
# via flask-appbuilder
prompt-toolkit==3.0.38
# via click-repl
pyarrow==10.0.1
pyarrow==12.0.0
# via apache-superset
pycparser==2.20
# via cffi
@@ -322,6 +326,10 @@ wtforms-json==0.3.5
# via apache-superset
xlsxwriter==3.0.7
# via apache-superset
zipp==3.15.0
# via
# importlib-metadata
# importlib-resources
# The following packages are considered to be unsafe in a requirements file:
# setuptools

View File

@@ -111,7 +111,7 @@ setup(
"python-dateutil",
"python-dotenv",
"python-geohash",
"pyarrow>=10.0.1, <11",
"pyarrow>=12.0.0, <13",
"pyyaml>=5.4",
"PyJWT>=2.4.0, <3.0",
"redis>=4.5.4, <5.0",

View File

@@ -24,7 +24,6 @@ from typing import Any, cast, Dict, List, Optional, Tuple, Union
import backoff
import msgpack
import pyarrow as pa
import simplejson as json
from celery import Task
from celery.exceptions import SoftTimeLimitExceeded
@@ -51,6 +50,7 @@ from superset.models.sql_lab import Query
from superset.result_set import SupersetResultSet
from superset.sql_parse import CtasMethod, insert_rls, ParsedQuery
from superset.sqllab.limiting_factor import LimitingFactor
from superset.sqllab.utils import write_ipc_buffer
from superset.utils.celery import session_scope
from superset.utils.core import (
json_iso_dttm_ser,
@@ -355,12 +355,7 @@ def _serialize_and_expand_data(
with stats_timing(
"sqllab.query.results_backend_pa_serialization", stats_logger
):
data = (
pa.default_serialization_context()
.serialize(result_set.pa_table)
.to_buffer()
.to_pybytes()
)
data = write_ipc_buffer(result_set.pa_table).to_pybytes()
# expand when loading data from results backend
all_columns, expanded_columns = (selected_columns, [])
@@ -379,7 +374,8 @@ def _serialize_and_expand_data(
return (data, selected_columns, all_columns, expanded_columns)
def execute_sql_statements( # pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches
def execute_sql_statements(
# pylint: disable=too-many-arguments, too-many-locals, too-many-statements, too-many-branches
query_id: int,
rendered_query: str,
return_results: bool,

View File

@@ -16,6 +16,8 @@
# under the License.
from typing import Any, Dict
import pyarrow as pa
from superset.common.db_query_status import QueryStatus
@@ -45,3 +47,12 @@ def apply_display_max_row_configuration_if_require( # pylint: disable=invalid-n
sql_results["data"] = sql_results["data"][:max_rows_in_result]
sql_results["displayLimitReached"] = True
return sql_results
def write_ipc_buffer(table: pa.Table) -> pa.Buffer:
sink = pa.BufferOutputStream()
with pa.ipc.new_stream(sink, table.schema) as writer:
writer.write_table(table)
return sink.getvalue()

View File

@@ -55,7 +55,6 @@ from superset.viz import BaseViz
logger = logging.getLogger(__name__)
stats_logger = app.config["STATS_LOGGER"]
REJECTED_FORM_DATA_KEYS: List[str] = []
if not feature_flag_manager.is_feature_enabled("ENABLE_JAVASCRIPT_CONTROLS"):
REJECTED_FORM_DATA_KEYS = ["js_tooltip", "js_onclick_href", "js_data_mutator"]
@@ -562,7 +561,8 @@ def _deserialize_results_payload(
with stats_timing("sqllab.query.results_backend_pa_deserialize", stats_logger):
try:
pa_table = pa.deserialize(ds_payload["data"])
reader = pa.BufferReader(ds_payload["data"])
pa_table = pa.ipc.open_stream(reader).read_all()
except pa.ArrowSerializationError as ex:
raise SerializationError("Unable to deserialize table") from ex