Infer types. Smart defaults for the visualize window. Basic implementation. (#1134)

* Implement smart suggestions for the visualize flow.

* Address JS comments.

* Implement caravel dataframe wrapper.
This commit is contained in:
Bogdan
2016-09-23 11:14:38 -07:00
committed by GitHub
parent fc921d63a1
commit df89bec712
5 changed files with 218 additions and 48 deletions

View File

@@ -14,7 +14,7 @@ import unittest
import pandas as pd
import caravel
from caravel import app, appbuilder, db, models, sql_lab, utils
from caravel import app, appbuilder, db, models, sql_lab, utils, dataframe
from .base_tests import CaravelTestCase
@@ -34,6 +34,7 @@ app.config['CELERY_CONFIG'] = CeleryConfig
class UtilityFunctionTests(CaravelTestCase):
# TODO(bkyryliuk): support more cases in CTA function.
def test_create_table_as(self):
select_query = "SELECT * FROM outer_space;"
@@ -193,8 +194,8 @@ class CeleryTestCase(CaravelTestCase):
result2 = self.run_sql(
1, sql_where, tmp_table='tmp_table_2', cta='true')
self.assertEqual(QueryStatus.SUCCESS, result2['query']['state'])
self.assertIsNone(result2['data'])
self.assertIsNone(result2['columns'])
self.assertEqual([], result2['data'])
self.assertEqual([], result2['columns'])
query2 = self.get_query_by_id(result2['query']['serverId'])
# Check the data in the tmp table.
@@ -208,8 +209,8 @@ class CeleryTestCase(CaravelTestCase):
result3 = self.run_sql(
1, sql_empty_result, tmp_table='tmp_table_3', cta='true',)
self.assertEqual(QueryStatus.SUCCESS, result3['query']['state'])
self.assertIsNone(result3['data'])
self.assertIsNone(result3['columns'])
self.assertEqual([], result3['data'])
self.assertEqual([], result3['columns'])
query3 = self.get_query_by_id(result3['query']['serverId'])
self.assertEqual(QueryStatus.SUCCESS, query3.status)
@@ -250,6 +251,52 @@ class CeleryTestCase(CaravelTestCase):
self.assertEqual(True, query1.select_as_cta)
self.assertEqual(True, query1.select_as_cta_used)
def test_get_columns_dict(self):
main_db = db.session.query(models.Database).filter_by(
database_name='main').first()
df = main_db.get_df("SELECT * FROM multiformat_time_series", None)
cdf = dataframe.CaravelDataFrame(df)
if main_db.sqlalchemy_uri.startswith('sqlite'):
self.assertEqual(
[{'is_date': True, 'type': 'datetime_string', 'name': 'ds',
'is_dim': False},
{'is_date': True, 'type': 'datetime_string', 'name': 'ds2',
'is_dim': False},
{'agg': 'sum', 'is_date': False, 'type': 'int64',
'name': 'epoch_ms', 'is_dim': False},
{'agg': 'sum', 'is_date': False, 'type': 'int64',
'name': 'epoch_s', 'is_dim': False},
{'is_date': True, 'type': 'datetime_string', 'name': 'string0',
'is_dim': False},
{'is_date': False, 'type': 'object',
'name': 'string1', 'is_dim': True},
{'is_date': True, 'type': 'datetime_string', 'name': 'string2',
'is_dim': False},
{'is_date': False, 'type': 'object',
'name': 'string3', 'is_dim': True}]
, cdf.columns_dict
)
else:
self.assertEqual(
[{'is_date': True, 'type': 'datetime_string', 'name': 'ds',
'is_dim': False},
{'is_date': True, 'type': 'datetime64[ns]',
'name': 'ds2', 'is_dim': False},
{'agg': 'sum', 'is_date': False, 'type': 'int64',
'name': 'epoch_ms', 'is_dim': False},
{'agg': 'sum', 'is_date': False, 'type': 'int64',
'name': 'epoch_s', 'is_dim': False},
{'is_date': True, 'type': 'datetime_string', 'name': 'string0',
'is_dim': False},
{'is_date': False, 'type': 'object',
'name': 'string1', 'is_dim': True},
{'is_date': True, 'type': 'datetime_string', 'name': 'string2',
'is_dim': False},
{'is_date': False, 'type': 'object',
'name': 'string3', 'is_dim': True}]
, cdf.columns_dict
)
if __name__ == '__main__':
unittest.main()