feat: apply Time Grain to X-Axis column (#21163)

This commit is contained in:
Yongjie Zhao
2022-09-07 16:24:15 +08:00
committed by GitHub
parent 875e9f8a04
commit ce3d38d2e7
24 changed files with 705 additions and 29 deletions

View File

@@ -18,7 +18,7 @@ from __future__ import annotations
import contextlib
import functools
from operator import ge
import os
from typing import Any, Callable, Optional, TYPE_CHECKING
from unittest.mock import patch
@@ -303,34 +303,38 @@ def virtual_dataset():
@pytest.fixture
def physical_dataset():
from superset.connectors.sqla.models import SqlaTable, SqlMetric, TableColumn
from superset.connectors.sqla.utils import get_identifier_quoter
example_database = get_example_database()
engine = example_database.get_sqla_engine()
quoter = get_identifier_quoter(engine.name)
# sqlite can only execute one statement at a time
engine.execute(
"""
f"""
CREATE TABLE IF NOT EXISTS physical_dataset(
col1 INTEGER,
col2 VARCHAR(255),
col3 DECIMAL(4,2),
col4 VARCHAR(255),
col5 TIMESTAMP
col5 TIMESTAMP DEFAULT '1970-01-01 00:00:01',
col6 TIMESTAMP DEFAULT '1970-01-01 00:00:01',
{quoter('time column with spaces')} TIMESTAMP DEFAULT '1970-01-01 00:00:01'
);
"""
)
engine.execute(
"""
INSERT INTO physical_dataset values
(0, 'a', 1.0, NULL, '2000-01-01 00:00:00'),
(1, 'b', 1.1, NULL, '2000-01-02 00:00:00'),
(2, 'c', 1.2, NULL, '2000-01-03 00:00:00'),
(3, 'd', 1.3, NULL, '2000-01-04 00:00:00'),
(4, 'e', 1.4, NULL, '2000-01-05 00:00:00'),
(5, 'f', 1.5, NULL, '2000-01-06 00:00:00'),
(6, 'g', 1.6, NULL, '2000-01-07 00:00:00'),
(7, 'h', 1.7, NULL, '2000-01-08 00:00:00'),
(8, 'i', 1.8, NULL, '2000-01-09 00:00:00'),
(9, 'j', 1.9, NULL, '2000-01-10 00:00:00');
(0, 'a', 1.0, NULL, '2000-01-01 00:00:00', '2002-01-03 00:00:00', '2002-01-03 00:00:00'),
(1, 'b', 1.1, NULL, '2000-01-02 00:00:00', '2002-02-04 00:00:00', '2002-02-04 00:00:00'),
(2, 'c', 1.2, NULL, '2000-01-03 00:00:00', '2002-03-07 00:00:00', '2002-03-07 00:00:00'),
(3, 'd', 1.3, NULL, '2000-01-04 00:00:00', '2002-04-12 00:00:00', '2002-04-12 00:00:00'),
(4, 'e', 1.4, NULL, '2000-01-05 00:00:00', '2002-05-11 00:00:00', '2002-05-11 00:00:00'),
(5, 'f', 1.5, NULL, '2000-01-06 00:00:00', '2002-06-13 00:00:00', '2002-06-13 00:00:00'),
(6, 'g', 1.6, NULL, '2000-01-07 00:00:00', '2002-07-15 00:00:00', '2002-07-15 00:00:00'),
(7, 'h', 1.7, NULL, '2000-01-08 00:00:00', '2002-08-18 00:00:00', '2002-08-18 00:00:00'),
(8, 'i', 1.8, NULL, '2000-01-09 00:00:00', '2002-09-20 00:00:00', '2002-09-20 00:00:00'),
(9, 'j', 1.9, NULL, '2000-01-10 00:00:00', '2002-10-22 00:00:00', '2002-10-22 00:00:00');
"""
)
@@ -343,6 +347,13 @@ def physical_dataset():
TableColumn(column_name="col3", type="DECIMAL(4,2)", table=dataset)
TableColumn(column_name="col4", type="VARCHAR(255)", table=dataset)
TableColumn(column_name="col5", type="TIMESTAMP", is_dttm=True, table=dataset)
TableColumn(column_name="col6", type="TIMESTAMP", is_dttm=True, table=dataset)
TableColumn(
column_name="time column with spaces",
type="TIMESTAMP",
is_dttm=True,
table=dataset,
)
SqlMetric(metric_name="count", expression="count(*)", table=dataset)
db.session.merge(dataset)
db.session.commit()
@@ -385,3 +396,9 @@ def virtual_dataset_comma_in_column_value():
db.session.delete(dataset)
db.session.commit()
only_postgresql = pytest.mark.skipif(
"postgresql" not in os.environ.get("SUPERSET__SQLALCHEMY_DATABASE_URI", ""),
reason="Only run test case in Postgresql",
)

View File

@@ -30,6 +30,7 @@ from superset.common.query_object import QueryObject
from superset.connectors.sqla.models import SqlMetric
from superset.datasource.dao import DatasourceDAO
from superset.extensions import cache_manager
from superset.superset_typing import AdhocColumn
from superset.utils.core import (
AdhocMetricExpressionType,
backend,
@@ -38,6 +39,7 @@ from superset.utils.core import (
)
from superset.utils.pandas_postprocessing.utils import FLAT_COLUMN_SEPARATOR
from tests.integration_tests.base_tests import SupersetTestCase
from tests.integration_tests.conftest import only_postgresql
from tests.integration_tests.fixtures.birth_names_dashboard import (
load_birth_names_dashboard_with_slices,
load_birth_names_data,
@@ -728,3 +730,183 @@ def test_get_label_map(app_context, virtual_dataset_comma_in_column_value):
"count, col2, row2": ["count", "col2, row2"],
"count, col2, row3": ["count", "col2, row3"],
}
def test_time_column_with_time_grain(app_context, physical_dataset):
column_on_axis: AdhocColumn = {
"label": "I_AM_AN_ORIGINAL_COLUMN",
"sqlExpression": "col5",
"timeGrain": "P1Y",
}
adhoc_column: AdhocColumn = {
"label": "I_AM_A_TRUNC_COLUMN",
"sqlExpression": "col6",
"columnType": "BASE_AXIS",
"timeGrain": "P1Y",
}
qc = QueryContextFactory().create(
datasource={
"type": physical_dataset.type,
"id": physical_dataset.id,
},
queries=[
{
"columns": ["col1", column_on_axis, adhoc_column],
"metrics": ["count"],
"orderby": [["col1", True]],
}
],
result_type=ChartDataResultType.FULL,
force=True,
)
query_object = qc.queries[0]
df = qc.get_df_payload(query_object)["df"]
if query_object.datasource.database.backend == "sqlite":
# sqlite returns string as timestamp column
assert df["I_AM_AN_ORIGINAL_COLUMN"][0] == "2000-01-01 00:00:00"
assert df["I_AM_AN_ORIGINAL_COLUMN"][1] == "2000-01-02 00:00:00"
assert df["I_AM_A_TRUNC_COLUMN"][0] == "2002-01-01 00:00:00"
assert df["I_AM_A_TRUNC_COLUMN"][1] == "2002-01-01 00:00:00"
else:
assert df["I_AM_AN_ORIGINAL_COLUMN"][0].strftime("%Y-%m-%d") == "2000-01-01"
assert df["I_AM_AN_ORIGINAL_COLUMN"][1].strftime("%Y-%m-%d") == "2000-01-02"
assert df["I_AM_A_TRUNC_COLUMN"][0].strftime("%Y-%m-%d") == "2002-01-01"
assert df["I_AM_A_TRUNC_COLUMN"][1].strftime("%Y-%m-%d") == "2002-01-01"
def test_non_time_column_with_time_grain(app_context, physical_dataset):
qc = QueryContextFactory().create(
datasource={
"type": physical_dataset.type,
"id": physical_dataset.id,
},
queries=[
{
"columns": [
"col1",
{
"label": "COL2 ALIAS",
"sqlExpression": "col2",
"columnType": "BASE_AXIS",
"timeGrain": "P1Y",
},
],
"metrics": ["count"],
"orderby": [["col1", True]],
"row_limit": 1,
}
],
result_type=ChartDataResultType.FULL,
force=True,
)
query_object = qc.queries[0]
df = qc.get_df_payload(query_object)["df"]
assert df["COL2 ALIAS"][0] == "a"
def test_special_chars_in_column_name(app_context, physical_dataset):
qc = QueryContextFactory().create(
datasource={
"type": physical_dataset.type,
"id": physical_dataset.id,
},
queries=[
{
"columns": [
"col1",
"time column with spaces",
{
"label": "I_AM_A_TRUNC_COLUMN",
"sqlExpression": "time column with spaces",
"columnType": "BASE_AXIS",
"timeGrain": "P1Y",
},
],
"metrics": ["count"],
"orderby": [["col1", True]],
"row_limit": 1,
}
],
result_type=ChartDataResultType.FULL,
force=True,
)
query_object = qc.queries[0]
df = qc.get_df_payload(query_object)["df"]
if query_object.datasource.database.backend == "sqlite":
# sqlite returns string as timestamp column
assert df["time column with spaces"][0] == "2002-01-03 00:00:00"
assert df["I_AM_A_TRUNC_COLUMN"][0] == "2002-01-01 00:00:00"
else:
assert df["time column with spaces"][0].strftime("%Y-%m-%d") == "2002-01-03"
assert df["I_AM_A_TRUNC_COLUMN"][0].strftime("%Y-%m-%d") == "2002-01-01"
@only_postgresql
def test_date_adhoc_column(app_context, physical_dataset):
# sql expression returns date type
column_on_axis: AdhocColumn = {
"label": "ADHOC COLUMN",
"sqlExpression": "col6 + interval '20 year'",
"columnType": "BASE_AXIS",
"timeGrain": "P1Y",
}
qc = QueryContextFactory().create(
datasource={
"type": physical_dataset.type,
"id": physical_dataset.id,
},
queries=[
{
"columns": [column_on_axis],
"metrics": ["count"],
}
],
result_type=ChartDataResultType.FULL,
force=True,
)
query_object = qc.queries[0]
df = qc.get_df_payload(query_object)["df"]
# ADHOC COLUMN count
# 0 2022-01-01 10
assert df["ADHOC COLUMN"][0].strftime("%Y-%m-%d") == "2022-01-01"
assert df["count"][0] == 10
@only_postgresql
def test_non_date_adhoc_column(app_context, physical_dataset):
# sql expression returns non-date type
column_on_axis: AdhocColumn = {
"label": "ADHOC COLUMN",
"sqlExpression": "col1 * 10",
"columnType": "BASE_AXIS",
"timeGrain": "P1Y",
}
qc = QueryContextFactory().create(
datasource={
"type": physical_dataset.type,
"id": physical_dataset.id,
},
queries=[
{
"columns": [column_on_axis],
"metrics": ["count"],
"orderby": [
[
{
"expressionType": "SQL",
"sqlExpression": '"ADHOC COLUMN"',
},
True,
]
],
}
],
result_type=ChartDataResultType.FULL,
force=True,
)
query_object = qc.queries[0]
df = qc.get_df_payload(query_object)["df"]
assert df["ADHOC COLUMN"][0] == 0
assert df["ADHOC COLUMN"][1] == 10

View File

@@ -30,7 +30,6 @@ from superset.utils.core import (
get_metric_names,
get_time_filter_status,
is_adhoc_metric,
NO_TIME_RANGE,
)
from tests.unit_tests.fixtures.datasets import get_dataset_mock