Match viz dataframe column case to form_data fields for Snowflake, Oracle and Redshift (#5487)

* Add function to fix dataframe column case * Fix broken handle_nulls method * Add case sensitivity option to dedup * Refactor function definition and call location * Remove added blank line * Move df column rename logit to db_engine_spec * Remove redundant variable * Update comments in db_engine_specs * Tie df adjustment to db_engine_spec class attribute * Fix dedup error * Linting * Check for db_engine_spec attribute prior to adjustment * Rename case sensitivity flag * Linting * Remove function that was moved to db_engine_specs * Get metrics names from utils * Remove double import and rename dedup variable
2026-04-20 00:24:38 +00:00 · 2018-08-03 19:53:56 +03:00
parent aa9b30cf55
commit e1f4db8e24
4 changed files with 91 additions and 16 deletions
--- a/superset/dataframe.py
+++ b/superset/dataframe.py
@@ -27,23 +27,26 @@ INFER_COL_TYPES_THRESHOLD = 95
 INFER_COL_TYPES_SAMPLE_SIZE = 100


-def dedup(l, suffix='__'):
+def dedup(l, suffix='__', case_sensitive=True):
    """De-duplicates a list of string by suffixing a counter

    Always returns the same number of entries as provided, and always returns
-    unique values.
+    unique values. Case sensitive comparison by default.

-    >>> print(','.join(dedup(['foo', 'bar', 'bar', 'bar'])))
-    foo,bar,bar__1,bar__2
+    >>> print(','.join(dedup(['foo', 'bar', 'bar', 'bar', 'Bar'])))
+    foo,bar,bar__1,bar__2,Bar
+    >>> print(','.join(dedup(['foo', 'bar', 'bar', 'bar', 'Bar'], case_sensitive=False)))
+    foo,bar,bar__1,bar__2,Bar__3
    """
    new_l = []
    seen = {}
    for s in l:
-        if s in seen:
-            seen[s] += 1
-            s += suffix + str(seen[s])
+        s_fixed_case = s if case_sensitive else s.lower()
+        if s_fixed_case in seen:
+            seen[s_fixed_case] += 1
+            s += suffix + str(seen[s_fixed_case])
        else:
-            seen[s] = 0
+            seen[s_fixed_case] = 0
        new_l.append(s)
    return new_l

@@ -70,7 +73,9 @@ class SupersetDataFrame(object):
        if cursor_description:
            column_names = [col[0] for col in cursor_description]

-        self.column_names = dedup(column_names)
+        case_sensitive = db_engine_spec.consistent_case_sensitivity
+        self.column_names = dedup(column_names,
+                                  case_sensitive=case_sensitive)

        data = data or []
        self.df = (