0.26.3

0.26.2
0.26.1
2026-04-30 13:34:20 +00:00 · 2018-07-05 09:47:37 -04:00 · 2018-07-05 09:37:44 -04:00 · 2018-07-04 17:16:24 -04:00 · 2018-07-04 17:15:38 -04:00 · 2018-07-03 10:13:49 -04:00
14 changed files with 281 additions and 136 deletions
--- a/requirements.txt
+++ b/requirements.txt
@@ -34,6 +34,7 @@ six==1.11.0
 sqlalchemy==1.2.2
 sqlalchemy-utils==0.32.21
 sqlparse==0.2.4
+tableschema==1.1.0
 thrift==0.11.0
 thrift-sasl==0.3.0
 unicodecsv==0.14.1
--- a/setup.py
+++ b/setup.py
@@ -4,6 +4,7 @@ from __future__ import division
 from __future__ import print_function
 from __future__ import unicode_literals

+import io
 import json
 import os
 import subprocess
@@ -16,14 +17,14 @@ PACKAGE_FILE = os.path.join(PACKAGE_DIR, 'package.json')
 with open(PACKAGE_FILE) as package_file:
    version_string = json.load(package_file)['version']

-with open('README.md') as readme:
-    long_description = readme.read()
+with io.open('README.md', encoding='utf-8') as f:
+    long_description = f.read()


 def get_git_sha():
    try:
-        s = str(subprocess.check_output(['git', 'rev-parse', 'HEAD']))
-        return s.strip()
+        s = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
+        return s.decode().strip()
    except Exception:
        return ''

@@ -47,6 +48,7 @@ setup(
    description=(
        'A modern, enterprise-ready business intelligence web application'),
    long_description=long_description,
+    long_description_content_type='text/markdown',
    version=version_string,
    packages=find_packages(),
    include_package_data=True,
@@ -54,7 +56,7 @@ setup(
    scripts=['superset/bin/superset'],
    install_requires=[
        'bleach',
-        'boto3>=1.4.6',
+        'boto3==1.4.7',
        'botocore>=1.7.0, <1.8.0',
        'celery>=4.2.0',
        'colorama',
@@ -90,6 +92,7 @@ setup(
        'sqlalchemy',
        'sqlalchemy-utils',
        'sqlparse',
+        'tableschema',
        'thrift>=0.9.3',
        'thrift-sasl>=0.2.1',
        'unicodecsv',
--- a/superset/assets/package.json
+++ b/superset/assets/package.json
@@ -1,6 +1,6 @@
 {
  "name": "superset",
-  "version": "0.999.0dev",
+  "version": "0.26.3",
  "description": "Superset is a data exploration platform designed to be visual, intuitive, and interactive.",
  "license": "Apache-2.0",
  "directories": {
--- a/superset/config.py
+++ b/superset/config.py
@@ -314,6 +314,10 @@ CSV_TO_HIVE_UPLOAD_S3_BUCKET = None
 # contain all the external tables
 CSV_TO_HIVE_UPLOAD_DIRECTORY = 'EXTERNAL_HIVE_TABLES/'

+# The namespace within hive where the tables created from
+# uploading CSVs will be stored.
+UPLOADED_CSV_HIVE_NAMESPACE = None
+
 # A dictionary of items that gets merged into the Jinja context for
 # SQL Lab. The existing context gets updated with this dictionary,
 # meaning values for existing keys get overwritten by the content of this
--- a/superset/dataframe.py
+++ b/superset/dataframe.py
@@ -13,6 +13,7 @@ from __future__ import print_function
 from __future__ import unicode_literals

 from datetime import date, datetime
+import logging

 import numpy as np
 import pandas as pd
@@ -26,6 +27,27 @@ INFER_COL_TYPES_THRESHOLD = 95
 INFER_COL_TYPES_SAMPLE_SIZE = 100


+def dedup(l, suffix='__'):
+    """De-duplicates a list of string by suffixing a counter
+
+    Always returns the same number of entries as provided, and always returns
+    unique values.
+
+    >>> print(','.join(dedup(['foo', 'bar', 'bar', 'bar'])))
+    foo,bar,bar__1,bar__2
+    """
+    new_l = []
+    seen = {}
+    for s in l:
+        if s in seen:
+            seen[s] += 1
+            s += suffix + str(seen[s])
+        else:
+            seen[s] = 0
+        new_l.append(s)
+    return new_l
+
+
 class SupersetDataFrame(object):
    # Mapping numpy dtype.char to generic database types
    type_map = {
@@ -43,19 +65,39 @@ class SupersetDataFrame(object):
        'V': None,   # raw data (void)
    }

-    def __init__(self, df):
-        self.__df = df.where((pd.notnull(df)), None)
+    def __init__(self, data, cursor_description, db_engine_spec):
+        column_names = []
+        if cursor_description:
+            column_names = [col[0] for col in cursor_description]
+
+        self.column_names = dedup(
+            db_engine_spec.get_normalized_column_names(cursor_description))
+
+        data = data or []
+        self.df = (
+            pd.DataFrame(list(data), columns=column_names).infer_objects())
+
+        self._type_dict = {}
+        try:
+            # The driver may not be passing a cursor.description
+            self._type_dict = {
+                col: db_engine_spec.get_datatype(cursor_description[i][1])
+                for i, col in enumerate(self.column_names)
+                if cursor_description
+            }
+        except Exception as e:
+            logging.exception(e)

    @property
    def size(self):
-        return len(self.__df.index)
+        return len(self.df.index)

    @property
    def data(self):
        # work around for https://github.com/pandas-dev/pandas/issues/18372
        data = [dict((k, _maybe_box_datetimelike(v))
-                for k, v in zip(self.__df.columns, np.atleast_1d(row)))
-                for row in self.__df.values]
+                for k, v in zip(self.df.columns, np.atleast_1d(row)))
+                for row in self.df.values]
        for d in data:
            for k, v in list(d.items()):
                # if an int is too big for Java Script to handle
@@ -70,7 +112,8 @@ class SupersetDataFrame(object):
        """Given a numpy dtype, Returns a generic database type"""
        if isinstance(dtype, ExtensionDtype):
            return cls.type_map.get(dtype.kind)
-        return cls.type_map.get(dtype.char)
+        elif hasattr(dtype, 'char'):
+            return cls.type_map.get(dtype.char)

    @classmethod
    def datetime_conversion_rate(cls, data_series):
@@ -105,7 +148,7 @@ class SupersetDataFrame(object):
        # consider checking for key substring too.
        if cls.is_id(column_name):
            return 'count_distinct'
-        if (issubclass(dtype.type, np.generic) and
+        if (hasattr(dtype, 'type') and issubclass(dtype.type, np.generic) and
                np.issubdtype(dtype, np.number)):
            return 'sum'
        return None
@@ -116,22 +159,25 @@ class SupersetDataFrame(object):

        :return: dict, with the fields name, type, is_date, is_dim and agg.
        """
-        if self.__df.empty:
+        if self.df.empty:
            return None

        columns = []
-        sample_size = min(INFER_COL_TYPES_SAMPLE_SIZE, len(self.__df.index))
-        sample = self.__df
+        sample_size = min(INFER_COL_TYPES_SAMPLE_SIZE, len(self.df.index))
+        sample = self.df
        if sample_size:
-            sample = self.__df.sample(sample_size)
-        for col in self.__df.dtypes.keys():
-            col_db_type = self.db_type(self.__df.dtypes[col])
+            sample = self.df.sample(sample_size)
+        for col in self.df.dtypes.keys():
+            col_db_type = (
+                self._type_dict.get(col) or
+                self.db_type(self.df.dtypes[col])
+            )
            column = {
                'name': col,
-                'agg': self.agg_func(self.__df.dtypes[col], col),
+                'agg': self.agg_func(self.df.dtypes[col], col),
                'type': col_db_type,
-                'is_date': self.is_date(self.__df.dtypes[col]),
-                'is_dim': self.is_dimension(self.__df.dtypes[col], col),
+                'is_date': self.is_date(self.df.dtypes[col]),
+                'is_dim': self.is_dimension(self.df.dtypes[col], col),
            }

            if column['type'] in ('OBJECT', None):
--- a/superset/db_engine_specs.py
+++ b/superset/db_engine_specs.py
@@ -30,6 +30,7 @@ import boto3
 from flask import g
 from flask_babel import lazy_gettext as _
 import pandas
+from past.builtins import basestring
 import sqlalchemy as sqla
 from sqlalchemy import select
 from sqlalchemy.engine import create_engine
@@ -37,7 +38,7 @@ from sqlalchemy.engine.url import make_url
 from sqlalchemy.sql import text
 from sqlalchemy.sql.expression import TextAsFrom
 import sqlparse
-import unicodecsv
+from tableschema import Table
 from werkzeug.utils import secure_filename

 from superset import app, cache_util, conf, db, utils
@@ -85,6 +86,11 @@ class BaseEngineSpec(object):
    def epoch_ms_to_dttm(cls):
        return cls.epoch_to_dttm().replace('{col}', '({col}/1000.0)')

+    @classmethod
+    def get_datatype(cls, type_code):
+        if isinstance(type_code, basestring) and len(type_code):
+            return type_code.upper()
+
    @classmethod
    def extra_table_metadata(cls, database, table_name, schema_name):
        """Returns engine-specific table metadata"""
@@ -134,7 +140,7 @@ class BaseEngineSpec(object):
    @staticmethod
    def csv_to_df(**kwargs):
        kwargs['filepath_or_buffer'] = \
-            app.config['UPLOAD_FOLDER'] + kwargs['filepath_or_buffer']
+            config['UPLOAD_FOLDER'] + kwargs['filepath_or_buffer']
        kwargs['encoding'] = 'utf-8'
        kwargs['iterator'] = True
        chunks = pandas.read_csv(**kwargs)
@@ -156,7 +162,7 @@ class BaseEngineSpec(object):
        def _allowed_file(filename):
            # Only allow specific file extensions as specified in the config
            extension = os.path.splitext(filename)[1]
-            return extension and extension[1:] in app.config['ALLOWED_EXTENSIONS']
+            return extension and extension[1:] in config['ALLOWED_EXTENSIONS']

        filename = secure_filename(form.csv_file.data.filename)
        if not _allowed_file(filename):
@@ -592,6 +598,7 @@ class MySQLEngineSpec(BaseEngineSpec):
              'INTERVAL DAYOFWEEK(DATE_SUB({col}, INTERVAL 1 DAY)) - 1 DAY))',
              'P1W'),
    )
+    type_code_map = {}  # loaded from get_datatype only if needed

    @classmethod
    def convert_dttm(cls, target_type, dttm):
@@ -606,6 +613,23 @@ class MySQLEngineSpec(BaseEngineSpec):
            uri.database = selected_schema
        return uri

+    @classmethod
+    def get_datatype(cls, type_code):
+        if not cls.type_code_map:
+            # only import and store if needed at least once
+            import MySQLdb
+            ft = MySQLdb.constants.FIELD_TYPE
+            cls.type_code_map = {
+                getattr(ft, k): k
+                for k in dir(ft)
+                if not k.startswith('_')
+            }
+        datatype = type_code
+        if isinstance(type_code, int):
+            datatype = cls.type_code_map.get(type_code)
+        if datatype and isinstance(datatype, basestring) and len(datatype):
+            return datatype
+
    @classmethod
    def epoch_to_dttm(cls):
        return 'from_unixtime({col})'
@@ -973,28 +997,48 @@ class HiveEngineSpec(PrestoEngineSpec):
    @staticmethod
    def create_table_from_csv(form, table):
        """Uploads a csv file and creates a superset datasource in Hive."""
-        def get_column_names(filepath):
-            with open(filepath, 'rb') as f:
-                return next(unicodecsv.reader(f, encoding='utf-8-sig'))
+        def convert_to_hive_type(col_type):
+            """maps tableschema's types to hive types"""
+            tableschema_to_hive_types = {
+                'boolean': 'BOOLEAN',
+                'integer': 'INT',
+                'number': 'DOUBLE',
+                'string': 'STRING',
+            }
+            return tableschema_to_hive_types.get(col_type, 'STRING')

        table_name = form.name.data
+        if config.get('UPLOADED_CSV_HIVE_NAMESPACE'):
+            if '.' in table_name:
+                raise Exception(
+                    "You can't specify a namespace. "
+                    'All tables will be uploaded to the `{}` namespace'.format(
+                        config.get('HIVE_NAMESPACE')))
+            table_name = '{}.{}'.format(
+                config.get('UPLOADED_CSV_HIVE_NAMESPACE'), table_name)
        filename = form.csv_file.data.filename

-        bucket_path = app.config['CSV_TO_HIVE_UPLOAD_S3_BUCKET']
+        bucket_path = config['CSV_TO_HIVE_UPLOAD_S3_BUCKET']

        if not bucket_path:
            logging.info('No upload bucket specified')
            raise Exception(
                'No upload bucket specified. You can specify one in the config file.')

-        upload_prefix = app.config['CSV_TO_HIVE_UPLOAD_DIRECTORY']
-        dest_path = os.path.join(table_name, filename)
+        table_name = form.name.data
+        filename = form.csv_file.data.filename
+        upload_prefix = config['CSV_TO_HIVE_UPLOAD_DIRECTORY']

-        upload_path = app.config['UPLOAD_FOLDER'] + \
+        upload_path = config['UPLOAD_FOLDER'] + \
            secure_filename(form.csv_file.data.filename)
-        column_names = get_column_names(upload_path)
-        schema_definition = ', '.join(
-            [s + ' STRING ' for s in column_names])
+
+        hive_table_schema = Table(upload_path).infer()
+        column_name_and_type = []
+        for column_info in hive_table_schema['fields']:
+            column_name_and_type.append(
+                '{} {}'.format(
+                    column_info['name'], convert_to_hive_type(column_info['type'])))
+        schema_definition = ', '.join(column_name_and_type)

        s3 = boto3.client('s3')
        location = os.path.join('s3a://', bucket_path, upload_prefix, table_name)
--- a/superset/exceptions.py
+++ b/superset/exceptions.py
@@ -26,5 +26,9 @@ class NoDataException(SupersetException):
    status = 400


+class NullValueException(SupersetException):
+    status = 400
+
+
 class SupersetTemplateException(SupersetException):
    pass
--- a/superset/sql_lab.py
+++ b/superset/sql_lab.py
@@ -10,8 +10,6 @@ import uuid

 from celery.exceptions import SoftTimeLimitExceeded
 from contextlib2 import contextmanager
-import numpy as np
-import pandas as pd
 import sqlalchemy
 from sqlalchemy.orm import sessionmaker
 from sqlalchemy.pool import NullPool
@@ -31,27 +29,6 @@ class SqlLabException(Exception):
    pass


-def dedup(l, suffix='__'):
-    """De-duplicates a list of string by suffixing a counter
-
-    Always returns the same number of entries as provided, and always returns
-    unique values.
-
-    >>> print(','.join(dedup(['foo', 'bar', 'bar', 'bar'])))
-    foo,bar,bar__1,bar__2
-    """
-    new_l = []
-    seen = {}
-    for s in l:
-        if s in seen:
-            seen[s] += 1
-            s += suffix + str(seen[s])
-        else:
-            seen[s] = 0
-        new_l.append(s)
-    return new_l
-
-
 def get_query(query_id, session, retry_count=5):
    """attemps to get the query and retry if it cannot"""
    query = None
@@ -96,24 +73,6 @@ def session_scope(nullpool):
        session.close()


-def convert_results_to_df(column_names, data):
-    """Convert raw query results to a DataFrame."""
-    column_names = dedup(column_names)
-
-    # check whether the result set has any nested dict columns
-    if data:
-        first_row = data[0]
-        has_dict_col = any([isinstance(c, dict) for c in first_row])
-        df_data = list(data) if has_dict_col else np.array(data, dtype=object)
-    else:
-        df_data = []
-
-    cdf = dataframe.SupersetDataFrame(
-        pd.DataFrame(df_data, columns=column_names))
-
-    return cdf
-
-
@celery_app.task(bind=True, soft_time_limit=SQLLAB_TIMEOUT)
 def get_sql_results(
    ctask, query_id, rendered_query, return_results=True, store_results=False,
@@ -233,7 +192,6 @@ def execute_sql(
        return handle_error(db_engine_spec.extract_error_message(e))

    logging.info('Fetching cursor description')
-    column_names = db_engine_spec.get_normalized_column_names(cursor.description)

    if conn is not None:
        conn.commit()
@@ -242,7 +200,7 @@ def execute_sql(
    if query.status == utils.QueryStatus.STOPPED:
        return handle_error('The query has been stopped')

-    cdf = convert_results_to_df(column_names, data)
+    cdf = dataframe.SupersetDataFrame(data, cursor.description, db_engine_spec)

    query.rows = cdf.size
    query.progress = 100
--- a/superset/viz.py
+++ b/superset/viz.py
@@ -38,6 +38,7 @@ from six import string_types, text_type
 from six.moves import cPickle as pkl, reduce

 from superset import app, cache, get_manifest_file, utils
+from superset.exceptions import NullValueException
 from superset.utils import DTTM_ALIAS, JS_MAX_INTEGER, merge_extra_filters


@@ -2109,6 +2110,11 @@ class BaseDeckGLViz(BaseViz):
            df[key] = list(zip(latlong.apply(lambda x: x[0]),
                               latlong.apply(lambda x: x[1])))
            del df[spatial.get('geohashCol')]
+
+        if df.get(key) is None:
+            raise NullValueException(_('Encountered invalid NULL spatial entry, \
+                                       please consider filtering those out'))
+
        return df

    def query_obj(self):
--- a/tests/celery_tests.py
+++ b/tests/celery_tests.py
@@ -14,7 +14,7 @@ import unittest
 import pandas as pd
 from past.builtins import basestring

-from superset import app, cli, dataframe, db, security_manager
+from superset import app, cli, db, security_manager
 from superset.models.helpers import QueryStatus
 from superset.models.sql_lab import Query
 from superset.sql_parse import SupersetQuery
@@ -245,55 +245,6 @@ class CeleryTestCase(SupersetTestCase):
    def dictify_list_of_dicts(cls, l, k):
        return {str(o[k]): cls.de_unicode_dict(o) for o in l}

-    def test_get_columns(self):
-        main_db = self.get_main_database(db.session)
-        df = main_db.get_df('SELECT * FROM multiformat_time_series', None)
-        cdf = dataframe.SupersetDataFrame(df)
-
-        # Making ordering non-deterministic
-        cols = self.dictify_list_of_dicts(cdf.columns, 'name')
-
-        if main_db.sqlalchemy_uri.startswith('sqlite'):
-            self.assertEqual(self.dictify_list_of_dicts([
-                {'is_date': True, 'type': 'STRING', 'name': 'ds',
-                    'is_dim': False},
-                {'is_date': True, 'type': 'STRING', 'name': 'ds2',
-                    'is_dim': False},
-                {'agg': 'sum', 'is_date': False, 'type': 'INT',
-                    'name': 'epoch_ms', 'is_dim': False},
-                {'agg': 'sum', 'is_date': False, 'type': 'INT',
-                    'name': 'epoch_s', 'is_dim': False},
-                {'is_date': True, 'type': 'STRING', 'name': 'string0',
-                    'is_dim': False},
-                {'is_date': False, 'type': 'STRING',
-                    'name': 'string1', 'is_dim': True},
-                {'is_date': True, 'type': 'STRING', 'name': 'string2',
-                    'is_dim': False},
-                {'is_date': False, 'type': 'STRING',
-                    'name': 'string3', 'is_dim': True}], 'name'),
-                cols,
-            )
-        else:
-            self.assertEqual(self.dictify_list_of_dicts([
-                {'is_date': True, 'type': 'DATETIME', 'name': 'ds',
-                    'is_dim': False},
-                {'is_date': True, 'type': 'DATETIME',
-                    'name': 'ds2', 'is_dim': False},
-                {'agg': 'sum', 'is_date': False, 'type': 'INT',
-                    'name': 'epoch_ms', 'is_dim': False},
-                {'agg': 'sum', 'is_date': False, 'type': 'INT',
-                    'name': 'epoch_s', 'is_dim': False},
-                {'is_date': True, 'type': 'STRING', 'name': 'string0',
-                    'is_dim': False},
-                {'is_date': False, 'type': 'STRING',
-                    'name': 'string1', 'is_dim': True},
-                {'is_date': True, 'type': 'STRING', 'name': 'string2',
-                    'is_dim': False},
-                {'is_date': False, 'type': 'STRING',
-                    'name': 'string3', 'is_dim': True}], 'name'),
-                cols,
-            )
-

 if __name__ == '__main__':
    unittest.main()
--- a/tests/core_tests.py
+++ b/tests/core_tests.py
@@ -24,6 +24,7 @@ import sqlalchemy as sqla

 from superset import dataframe, db, jinja_context, security_manager, sql_lab, utils
 from superset.connectors.sqla.models import SqlaTable
+from superset.db_engine_specs import BaseEngineSpec
 from superset.models import core as models
 from superset.models.sql_lab import Query
 from superset.views.core import DatabaseView
@@ -618,8 +619,7 @@ class CoreTests(SupersetTestCase):
            (datetime.datetime(2017, 11, 18, 21, 53, 0, 219225, tzinfo=tz),),
            (datetime.datetime(2017, 11, 18, 22, 6, 30, 61810, tzinfo=tz),),
        ]
-        df = dataframe.SupersetDataFrame(pd.DataFrame(data=list(data),
-                                                      columns=['data']))
+        df = dataframe.SupersetDataFrame(list(data), [['data']], BaseEngineSpec)
        data = df.data
        self.assertDictEqual(
            data[0],
--- a/tests/dataframe_test.py
+++ b/tests/dataframe_test.py
@@ -0,0 +1,115 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from __future__ import unicode_literals
+
+from superset.dataframe import dedup, SupersetDataFrame
+from superset.db_engine_specs import BaseEngineSpec
+from .base_tests import SupersetTestCase
+
+
+class SupersetDataFrameTestCase(SupersetTestCase):
+    def test_dedup(self):
+        self.assertEquals(
+            dedup(['foo', 'bar']),
+            ['foo', 'bar'],
+        )
+        self.assertEquals(
+            dedup(['foo', 'bar', 'foo', 'bar']),
+            ['foo', 'bar', 'foo__1', 'bar__1'],
+        )
+        self.assertEquals(
+            dedup(['foo', 'bar', 'bar', 'bar']),
+            ['foo', 'bar', 'bar__1', 'bar__2'],
+        )
+
+    def test_get_columns_basic(self):
+        data = [
+            ('a1', 'b1', 'c1'),
+            ('a2', 'b2', 'c2'),
+        ]
+        cursor_descr = (
+            ('a', 'string'),
+            ('b', 'string'),
+            ('c', 'string'),
+        )
+        cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
+        self.assertEqual(
+            cdf.columns,
+            [
+                {
+                    'is_date': False,
+                    'type': 'STRING',
+                    'name': 'a',
+                    'is_dim': True,
+                }, {
+                    'is_date': False,
+                    'type': 'STRING',
+                    'name': 'b',
+                    'is_dim': True,
+                }, {
+                    'is_date': False,
+                    'type': 'STRING',
+                    'name': 'c',
+                    'is_dim': True,
+                },
+            ],
+        )
+
+    def test_get_columns_with_int(self):
+        data = [
+            ('a1', 1),
+            ('a2', 2),
+        ]
+        cursor_descr = (
+            ('a', 'string'),
+            ('b', 'int'),
+        )
+        cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
+        self.assertEqual(
+            cdf.columns,
+            [
+                {
+                    'is_date': False,
+                    'type': 'STRING',
+                    'name': 'a',
+                    'is_dim': True,
+                }, {
+                    'is_date': False,
+                    'type': 'INT',
+                    'name': 'b',
+                    'is_dim': False,
+                    'agg': 'sum',
+                },
+            ],
+        )
+
+    def test_get_columns_type_inference(self):
+        data = [
+            (1.2, 1),
+            (3.14, 2),
+        ]
+        cursor_descr = (
+            ('a', None),
+            ('b', None),
+        )
+        cdf = SupersetDataFrame(data, cursor_descr, BaseEngineSpec)
+        self.assertEqual(
+            cdf.columns,
+            [
+                {
+                    'is_date': False,
+                    'type': 'FLOAT',
+                    'name': 'a',
+                    'is_dim': False,
+                    'agg': 'sum',
+                }, {
+                    'is_date': False,
+                    'type': 'INT',
+                    'name': 'b',
+                    'is_dim': False,
+                    'agg': 'sum',
+                },
+            ],
+        )
--- a/tests/db_engine_specs_test.py
+++ b/tests/db_engine_specs_test.py
@@ -7,7 +7,9 @@ from __future__ import unicode_literals
 import textwrap

 from superset.db_engine_specs import (
-    HiveEngineSpec, MssqlEngineSpec, MySQLEngineSpec)
+    BaseEngineSpec, HiveEngineSpec, MssqlEngineSpec,
+    MySQLEngineSpec, PrestoEngineSpec,
+)
 from superset.models.core import Database
 from .base_tests import SupersetTestCase

@@ -193,3 +195,9 @@ class DbEngineSpecsTestCase(SupersetTestCase):
                FROM
                table LIMIT 1000"""),
        )
+
+    def test_get_datatype(self):
+        self.assertEquals('STRING', PrestoEngineSpec.get_datatype('string'))
+        self.assertEquals('TINY', MySQLEngineSpec.get_datatype(1))
+        self.assertEquals('VARCHAR', MySQLEngineSpec.get_datatype(15))
+        self.assertEquals('VARCHAR', BaseEngineSpec.get_datatype('VARCHAR'))
--- a/tests/sqllab_tests.py
+++ b/tests/sqllab_tests.py
@@ -12,8 +12,9 @@ import unittest
 from flask_appbuilder.security.sqla import models as ab_models

 from superset import db, security_manager, utils
+from superset.dataframe import SupersetDataFrame
+from superset.db_engine_specs import BaseEngineSpec
 from superset.models.sql_lab import Query
-from superset.sql_lab import convert_results_to_df
 from .base_tests import SupersetTestCase


@@ -203,9 +204,13 @@ class SqlLabTests(SupersetTestCase):
            raise_on_error=True)

    def test_df_conversion_no_dict(self):
-        cols = ['string_col', 'int_col', 'float_col']
+        cols = [
+            ['string_col', 'string'],
+            ['int_col', 'int'],
+            ['float_col', 'float'],
+        ]
        data = [['a', 4, 4.0]]
-        cdf = convert_results_to_df(cols, data)
+        cdf = SupersetDataFrame(data, cols, BaseEngineSpec)

        self.assertEquals(len(data), cdf.size)
        self.assertEquals(len(cols), len(cdf.columns))
@@ -213,7 +218,7 @@ class SqlLabTests(SupersetTestCase):
    def test_df_conversion_tuple(self):
        cols = ['string_col', 'int_col', 'list_col', 'float_col']
        data = [(u'Text', 111, [123], 1.0)]
-        cdf = convert_results_to_df(cols, data)
+        cdf = SupersetDataFrame(data, cols, BaseEngineSpec)

        self.assertEquals(len(data), cdf.size)
        self.assertEquals(len(cols), len(cdf.columns))
@@ -221,7 +226,7 @@ class SqlLabTests(SupersetTestCase):
    def test_df_conversion_dict(self):
        cols = ['string_col', 'dict_col', 'int_col']
        data = [['a', {'c1': 1, 'c2': 2, 'c3': 3}, 4]]
-        cdf = convert_results_to_df(cols, data)
+        cdf = SupersetDataFrame(data, cols, BaseEngineSpec)

        self.assertEquals(len(data), cdf.size)
        self.assertEquals(len(cols), len(cdf.columns))
Author	SHA1	Message	Date
Maxime Beauchemin	b25c14da09	0.26.3	2018-07-05 09:47:37 -04:00
Maxime Beauchemin	721230098d	0.26.2	2018-07-05 09:37:44 -04:00
Maxime Beauchemin	988465c379	0.26.1	2018-07-04 17:16:24 -04:00
Riccardo Magliocchetti	dddcb141db	A couple of setup.py fixes (#5338 ) * setup: fix long description read in python2 * setup: fix git_get_sha in python3 Fix #5317 (cherry picked from commit `81bd5cc4c3`)	2018-07-04 17:15:38 -04:00
Maxime Beauchemin	112b67a01b	0.26.0	2018-07-03 10:13:49 -04:00
Hugh A. Miles II	2a19b4baaa	[DeckGL] Raise error with null values (#5302 ) * raise errors with null values * linting * linting some more * use get * change ordering * linting (cherry picked from commit `089037f1aa`)	2018-07-03 10:13:22 -04:00
Maxime Beauchemin	226b4fb5f4	[bugfix] README encoding-related UnicodeDecodeError on setup.py (#5309 ) Seeing UnicodeDecodeError on our build system running py3.6, though I couldn't reproduce on my local 3.6. This fix addresses the issue. (cherry picked from commit `885d7791a0`)	2018-07-03 10:13:13 -04:00
Maxime Beauchemin	fae7a146f2	0.26.0rc2	2018-06-27 21:40:43 -07:00
Maxime Beauchemin	9119091af7	Improve database type inference (#4724 ) * Improve database type inference Python's DBAPI isn't super clear and homogeneous on the cursor.description specification, and this PR attempts to improve inferring the datatypes returned in the cursor. This work started around Presto's TIMESTAMP type being mishandled as string as the database driver (pyhive) returns it as a string. The work here fixes this bug and does a better job at inferring MySQL and Presto types. It also creates a new method in db_engine_specs allowing for other databases engines to implement and become more precise on type-inference as needed. * Fixing tests * Adressing comments * Using infer_objects * Removing faulty line * Addressing PrestoSpec redundant method comment * Fix rebase issue * Fix tests (cherry picked from commit `777d876a52`)	2018-06-27 21:40:14 -07:00
Jeffrey Wang	af74c1b8bb	Pin boto3 to 1.4.7 (#5290 ) (cherry picked from commit `fb988fee2e`)	2018-06-27 21:40:01 -07:00
timifasubaa	7bf8920b64	add more precise types to hive table from csv (#5267 ) (cherry picked from commit `b0eee129e9`)	2018-06-27 21:39:48 -07:00
timifasubaa	bc6819a2ee	specify hve namespace for tables (#5268 ) (cherry picked from commit `bd24f854c9`)	2018-06-27 21:39:41 -07:00