fix: ensure column name in description is string (#20340)

* fix: ensure column name in description is string

* Add unit test
This commit is contained in:
Beto Dealmeida
2022-06-21 06:10:46 -07:00
committed by GitHub
parent 5afeba34bd
commit f3b289d3c3
3 changed files with 90 additions and 2 deletions

View File

@@ -71,6 +71,19 @@ def destringify(obj: str) -> Any:
return json.loads(obj)
def convert_to_string(value: Any) -> str:
"""
Used to ensure column names from the cursor description are strings.
"""
if isinstance(value, str):
return value
if isinstance(value, bytes):
return value.decode("utf-8")
return str(value)
class SupersetResultSet:
def __init__( # pylint: disable=too-many-locals
self,
@@ -88,7 +101,9 @@ class SupersetResultSet:
if cursor_description:
# get deduped list of column names
column_names = dedup([col[0] for col in cursor_description])
column_names = dedup(
[convert_to_string(col[0]) for col in cursor_description]
)
# fix cursor descriptor with the deduped names
deduped_cursor_desc = [

View File

@@ -69,7 +69,13 @@ class ResultSetColumnType(TypedDict):
CacheConfig = Dict[str, Any]
DbapiDescriptionRow = Tuple[
str, str, Optional[str], Optional[str], Optional[int], Optional[int], bool
Union[str, bytes],
str,
Optional[str],
Optional[str],
Optional[int],
Optional[int],
bool,
]
DbapiDescription = Union[List[DbapiDescriptionRow], Tuple[DbapiDescriptionRow, ...]]
DbapiResult = Sequence[Union[List[Any], Tuple[Any, ...]]]

View File

@@ -0,0 +1,67 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# pylint: disable=import-outside-toplevel, unused-argument
def test_column_names_as_bytes(app_context: None) -> None:
"""
Test that we can handle column names as bytes.
"""
from superset.db_engine_specs.redshift import RedshiftEngineSpec
from superset.result_set import SupersetResultSet
data = (
[
"2016-01-26",
392.002014,
397.765991,
390.575012,
392.153015,
392.153015,
58147000,
],
[
"2016-01-27",
392.444,
396.842987,
391.782013,
394.971985,
394.971985,
47424400,
],
)
description = [
(b"date", 1043, None, None, None, None, None),
(b"open", 701, None, None, None, None, None),
(b"high", 701, None, None, None, None, None),
(b"low", 701, None, None, None, None, None),
(b"close", 701, None, None, None, None, None),
(b"adj close", 701, None, None, None, None, None),
(b"volume", 20, None, None, None, None, None),
]
result_set = SupersetResultSet(data, description, RedshiftEngineSpec) # type: ignore
assert (
result_set.to_pandas_df().to_markdown()
== """
| | date | open | high | low | close | adj close | volume |
|---:|:-----------|--------:|--------:|--------:|--------:|------------:|---------:|
| 0 | 2016-01-26 | 392.002 | 397.766 | 390.575 | 392.153 | 392.153 | 58147000 |
| 1 | 2016-01-27 | 392.444 | 396.843 | 391.782 | 394.972 | 394.972 | 47424400 |
""".strip()
)