[BUGFIX]: Check datatype of results before converting to DataFrame (#4108)

* conditional check on datatype of results before converting to df fix type checking fix conditional checks remove trailing whitespace and fix df_data fallback def actually remove trailing whitespace generalized type check to check all columns for dict refactor dict col check * move df conversion to helper and add unit test add missing newlines another missing newline fix quotes more quote fixes
2026-04-19 08:04:53 +00:00 · 2018-01-23 23:58:06 -05:00
parent 2c72a7ae4f
commit 4bc5fe5495
2 changed files with 39 additions and 5 deletions
--- a/superset/sql_lab.py
+++ b/superset/sql_lab.py
@@ -10,6 +10,7 @@ from time import sleep
 import uuid

 from celery.exceptions import SoftTimeLimitExceeded
+import numpy as np
 import pandas as pd
 import sqlalchemy
 from sqlalchemy.orm import sessionmaker
@@ -85,6 +86,26 @@ def get_session(nullpool):
    return session


+def convert_results_to_df(cursor_description, data):
+    """Convert raw query results to a DataFrame."""
+    column_names = (
+        [col[0] for col in cursor_description] if cursor_description else [])
+    column_names = dedup(column_names)
+
+    # check whether the result set has any nested dict columns
+    if data:
+        first_row = data[0]
+        has_dict_col = any([isinstance(c, dict) for c in first_row])
+        df_data = list(data) if has_dict_col else np.array(data)
+    else:
+        df_data = []
+
+    cdf = dataframe.SupersetDataFrame(
+        pd.DataFrame(df_data, columns=column_names))
+
+    return cdf
+
+
@celery_app.task(bind=True, soft_time_limit=SQLLAB_TIMEOUT)
 def get_sql_results(
        ctask, query_id, return_results=True, store_results=False,
@@ -224,11 +245,7 @@ def execute_sql(
            },
            default=utils.json_iso_dttm_ser)

-    column_names = (
-        [col[0] for col in cursor_description] if cursor_description else [])
-    column_names = dedup(column_names)
-    cdf = dataframe.SupersetDataFrame(
-        pd.DataFrame(list(data), columns=column_names))
+    cdf = convert_results_to_df(cursor_description, data)

    query.rows = cdf.size
    query.progress = 100