Files
superset2/tests/model_tests.py
agrawaldevesh fc4042a28b SelectControl hackery: Fixing freeform select and allowing group by and non group-by column selectors to take in arbitrary expressions including select-all semantics. (#6722)
Summary: We want to allow grouping by on expressions, including the
ability to select expressions (without group-by). The UI ain't good yet
.. it is not at the feature parity of say adhoc filters/metrics that
come with a nice text-box to edit the SQL. But
this suffices for my usecase for now.

(The UI would have to be redone ... but that would require some refactoring
to merge the AdhocFilter/Metric/Popover stuff such that it can be used
for this use case too.)

Also fixed a bug in the SelectControl freeForm selection.

Allow selecting all columns easily: Added a new Select-All special item
that users can select to make all the options in the selectable show
up.

The ability to group by arbitrary expressions is useful because now two
users don't need to create custom computed-fields for this.
2019-02-04 12:34:24 -08:00

285 lines
11 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import textwrap
import pandas
from sqlalchemy.engine.url import make_url
from superset import app, db
from superset.models.core import Database
from superset.utils.core import get_main_database, QueryStatus
from .base_tests import SupersetTestCase
class DatabaseModelTestCase(SupersetTestCase):
def test_database_schema_presto(self):
sqlalchemy_uri = 'presto://presto.airbnb.io:8080/hive/default'
model = Database(sqlalchemy_uri=sqlalchemy_uri)
db = make_url(model.get_sqla_engine().url).database
self.assertEquals('hive/default', db)
db = make_url(model.get_sqla_engine(schema='core_db').url).database
self.assertEquals('hive/core_db', db)
sqlalchemy_uri = 'presto://presto.airbnb.io:8080/hive'
model = Database(sqlalchemy_uri=sqlalchemy_uri)
db = make_url(model.get_sqla_engine().url).database
self.assertEquals('hive', db)
db = make_url(model.get_sqla_engine(schema='core_db').url).database
self.assertEquals('hive/core_db', db)
def test_database_schema_postgres(self):
sqlalchemy_uri = 'postgresql+psycopg2://postgres.airbnb.io:5439/prod'
model = Database(sqlalchemy_uri=sqlalchemy_uri)
db = make_url(model.get_sqla_engine().url).database
self.assertEquals('prod', db)
db = make_url(model.get_sqla_engine(schema='foo').url).database
self.assertEquals('prod', db)
def test_database_schema_hive(self):
sqlalchemy_uri = 'hive://hive@hive.airbnb.io:10000/default?auth=NOSASL'
model = Database(sqlalchemy_uri=sqlalchemy_uri)
db = make_url(model.get_sqla_engine().url).database
self.assertEquals('default', db)
db = make_url(model.get_sqla_engine(schema='core_db').url).database
self.assertEquals('core_db', db)
def test_database_schema_mysql(self):
sqlalchemy_uri = 'mysql://root@localhost/superset'
model = Database(sqlalchemy_uri=sqlalchemy_uri)
db = make_url(model.get_sqla_engine().url).database
self.assertEquals('superset', db)
db = make_url(model.get_sqla_engine(schema='staging').url).database
self.assertEquals('staging', db)
def test_database_impersonate_user(self):
uri = 'mysql://root@localhost'
example_user = 'giuseppe'
model = Database(sqlalchemy_uri=uri)
model.impersonate_user = True
user_name = make_url(model.get_sqla_engine(user_name=example_user).url).username
self.assertEquals(example_user, user_name)
model.impersonate_user = False
user_name = make_url(model.get_sqla_engine(user_name=example_user).url).username
self.assertNotEquals(example_user, user_name)
def test_select_star(self):
main_db = get_main_database(db.session)
table_name = 'energy_usage'
sql = main_db.select_star(
table_name, show_cols=False, latest_partition=False)
expected = textwrap.dedent(f"""\
SELECT *
FROM {table_name}
LIMIT 100""")
assert sql.startswith(expected)
sql = main_db.select_star(
table_name, show_cols=True, latest_partition=False)
expected = textwrap.dedent(f"""\
SELECT source,
target,
value
FROM energy_usage
LIMIT 100""")
assert sql.startswith(expected)
def test_grains_dict(self):
uri = 'mysql://root@localhost'
database = Database(sqlalchemy_uri=uri)
d = database.grains_dict()
self.assertEquals(d.get('day').function, 'DATE({col})')
self.assertEquals(d.get('P1D').function, 'DATE({col})')
self.assertEquals(d.get('Time Column').function, '{col}')
def test_single_statement(self):
main_db = get_main_database(db.session)
if main_db.backend == 'mysql':
df = main_db.get_df('SELECT 1', None)
self.assertEquals(df.iat[0, 0], 1)
df = main_db.get_df('SELECT 1;', None)
self.assertEquals(df.iat[0, 0], 1)
def test_multi_statement(self):
main_db = get_main_database(db.session)
if main_db.backend == 'mysql':
df = main_db.get_df('USE superset; SELECT 1', None)
self.assertEquals(df.iat[0, 0], 1)
df = main_db.get_df("USE superset; SELECT ';';", None)
self.assertEquals(df.iat[0, 0], ';')
class SqlaTableModelTestCase(SupersetTestCase):
def test_get_timestamp_expression(self):
tbl = self.get_table_by_name('birth_names')
ds_col = tbl.get_column('ds')
sqla_literal = ds_col.get_timestamp_expression(None)
self.assertEquals(str(sqla_literal.compile()), 'ds')
sqla_literal = ds_col.get_timestamp_expression('P1D')
compiled = '{}'.format(sqla_literal.compile())
if tbl.database.backend == 'mysql':
self.assertEquals(compiled, 'DATE(ds)')
prev_ds_expr = ds_col.expression
ds_col.expression = 'DATE_ADD(ds, 1)'
sqla_literal = ds_col.get_timestamp_expression('P1D')
compiled = '{}'.format(sqla_literal.compile())
if tbl.database.backend == 'mysql':
self.assertEquals(compiled, 'DATE(DATE_ADD(ds, 1))')
ds_col.expression = prev_ds_expr
def test_get_timestamp_expression_epoch(self):
tbl = self.get_table_by_name('birth_names')
ds_col = tbl.get_column('ds')
ds_col.expression = None
ds_col.python_date_format = 'epoch_s'
sqla_literal = ds_col.get_timestamp_expression(None)
compiled = '{}'.format(sqla_literal.compile())
if tbl.database.backend == 'mysql':
self.assertEquals(compiled, 'from_unixtime(ds)')
ds_col.python_date_format = 'epoch_s'
sqla_literal = ds_col.get_timestamp_expression('P1D')
compiled = '{}'.format(sqla_literal.compile())
if tbl.database.backend == 'mysql':
self.assertEquals(compiled, 'DATE(from_unixtime(ds))')
prev_ds_expr = ds_col.expression
ds_col.expression = 'DATE_ADD(ds, 1)'
sqla_literal = ds_col.get_timestamp_expression('P1D')
compiled = '{}'.format(sqla_literal.compile())
if tbl.database.backend == 'mysql':
self.assertEquals(compiled, 'DATE(from_unixtime(DATE_ADD(ds, 1)))')
ds_col.expression = prev_ds_expr
def test_get_timestamp_expression_backward(self):
tbl = self.get_table_by_name('birth_names')
ds_col = tbl.get_column('ds')
ds_col.expression = None
ds_col.python_date_format = None
sqla_literal = ds_col.get_timestamp_expression('day')
compiled = '{}'.format(sqla_literal.compile())
if tbl.database.backend == 'mysql':
self.assertEquals(compiled, 'DATE(ds)')
ds_col.expression = None
ds_col.python_date_format = None
sqla_literal = ds_col.get_timestamp_expression('Time Column')
compiled = '{}'.format(sqla_literal.compile())
if tbl.database.backend == 'mysql':
self.assertEquals(compiled, 'ds')
def query_with_expr_helper(self, is_timeseries, inner_join=True):
tbl = self.get_table_by_name('birth_names')
ds_col = tbl.get_column('ds')
ds_col.expression = None
ds_col.python_date_format = None
spec = self.get_database_by_id(tbl.database_id).db_engine_spec
if not spec.inner_joins and inner_join:
# if the db does not support inner joins, we cannot force it so
return None
old_inner_join = spec.inner_joins
spec.inner_joins = inner_join
arbitrary_gby = "state || gender || '_test'"
arbitrary_metric = (dict(label='arbitrary', expressionType='SQL',
sqlExpression='COUNT(1)'))
query_obj = dict(
groupby=[arbitrary_gby, 'name'],
metrics=[arbitrary_metric],
filter=[],
is_timeseries=is_timeseries,
prequeries=[],
columns=[],
granularity='ds',
from_dttm=None,
to_dttm=None,
is_prequery=False,
extras=dict(time_grain_sqla='P1Y'),
)
qr = tbl.query(query_obj)
self.assertEqual(qr.status, QueryStatus.SUCCESS)
sql = qr.query
self.assertIn(arbitrary_gby, sql)
self.assertIn('name', sql)
if inner_join and is_timeseries:
self.assertIn('JOIN', sql.upper())
else:
self.assertNotIn('JOIN', sql.upper())
spec.inner_joins = old_inner_join
self.assertIsNotNone(qr.df)
return qr.df
def test_query_with_expr_groupby_timeseries(self):
def cannonicalize_df(df):
ret = df.sort_values(by=list(df.columns.values), inplace=False)
ret.reset_index(inplace=True, drop=True)
return ret
df1 = self.query_with_expr_helper(is_timeseries=True, inner_join=True)
df2 = self.query_with_expr_helper(is_timeseries=True, inner_join=False)
self.assertIsNotNone(df2) # df1 can be none if the db does not support join
if df1 is not None:
pandas.testing.assert_frame_equal(
cannonicalize_df(df1),
cannonicalize_df(df2))
def test_query_with_expr_groupby(self):
self.query_with_expr_helper(is_timeseries=False)
def test_sql_mutator(self):
tbl = self.get_table_by_name('birth_names')
query_obj = dict(
groupby=[],
metrics=[],
filter=[],
is_timeseries=False,
columns=['name'],
granularity=None,
from_dttm=None, to_dttm=None,
is_prequery=False,
extras={},
)
sql = tbl.get_query_str(query_obj)
self.assertNotIn('--COMMENT', sql)
def mutator(*args):
return '--COMMENT\n' + args[0]
app.config['SQL_QUERY_MUTATOR'] = mutator
sql = tbl.get_query_str(query_obj)
self.assertIn('--COMMENT', sql)
app.config['SQL_QUERY_MUTATOR'] = None