mirror of
https://github.com/apache/superset.git
synced 2026-05-30 04:39:20 +00:00
feat(sqlglot): Vertica dialect (#39969)
This commit is contained in:
@@ -20,5 +20,14 @@ from .dremio import Dremio
|
||||
from .firebolt import Firebolt, FireboltOld
|
||||
from .opensearch import OpenSearch
|
||||
from .pinot import Pinot
|
||||
from .vertica import Vertica
|
||||
|
||||
__all__ = ["DB2", "Dremio", "Firebolt", "FireboltOld", "OpenSearch", "Pinot"]
|
||||
__all__ = [
|
||||
"DB2",
|
||||
"Dremio",
|
||||
"Firebolt",
|
||||
"FireboltOld",
|
||||
"OpenSearch",
|
||||
"Pinot",
|
||||
"Vertica",
|
||||
]
|
||||
|
||||
87
superset/sql/dialects/vertica.py
Normal file
87
superset/sql/dialects/vertica.py
Normal file
@@ -0,0 +1,87 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
"""
|
||||
Vertica dialect.
|
||||
|
||||
Vertica is wire-compatible with PostgreSQL but provides additional analytical
|
||||
functions natively. This dialect extends the Postgres dialect to preserve
|
||||
Vertica-native functions that the Postgres generator would otherwise rewrite.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from sqlglot import exp
|
||||
from sqlglot.dialects.dialect import date_delta_sql
|
||||
from sqlglot.dialects.postgres import Postgres
|
||||
from sqlglot.helper import seq_get
|
||||
|
||||
|
||||
def _build_datediff(args: list[exp.Expression]) -> exp.DateDiff:
|
||||
# Vertica's signature is DATEDIFF(unit, start, end); the default sqlglot
|
||||
# parser assumes (end, start, unit), so we remap the positional args.
|
||||
return exp.DateDiff(
|
||||
this=seq_get(args, 2),
|
||||
expression=seq_get(args, 1),
|
||||
unit=exp.var(seq_get(args, 0).name) if seq_get(args, 0) else None,
|
||||
)
|
||||
|
||||
|
||||
class Vertica(Postgres):
|
||||
"""
|
||||
Vertica dialect.
|
||||
|
||||
Extends PostgreSQL by keeping functions that Vertica supports natively but
|
||||
Postgres does not (e.g. ``LAST_DAY``, ``DATEDIFF``, ``MEDIAN``, ``NVL2``).
|
||||
"""
|
||||
|
||||
class Parser(Postgres.Parser):
|
||||
FUNCTIONS = {
|
||||
**Postgres.Parser.FUNCTIONS,
|
||||
"DATEDIFF": _build_datediff,
|
||||
"TIMESTAMPDIFF": _build_datediff,
|
||||
}
|
||||
|
||||
class Generator(Postgres.Generator):
|
||||
# Vertica's LAST_DAY only accepts a date/timestamp; it does not take a
|
||||
# date part argument like Snowflake's variant.
|
||||
LAST_DAY_SUPPORTS_DATE_PART = False
|
||||
|
||||
# Vertica supports MEDIAN and NVL2 natively; Postgres does not, and the
|
||||
# inherited generator rewrites them into PERCENTILE_CONT and CASE
|
||||
# expressions respectively.
|
||||
SUPPORTS_MEDIAN = True
|
||||
NVL2_SUPPORTED = True
|
||||
|
||||
# Emit INTERVAL '<value>' <unit> (SQL-standard) instead of the
|
||||
# Postgres-style INTERVAL '<value> <unit>'. Vertica miscomputes the
|
||||
# combined-string form for MONTH/YEAR units (treats them as a fixed
|
||||
# number of days). See https://forum.vertica.com/discussion/229329/.
|
||||
SINGLE_STRING_INTERVAL = False
|
||||
|
||||
TRANSFORMS = {
|
||||
**Postgres.Generator.TRANSFORMS,
|
||||
# Postgres rewrites LAST_DAY into DATE_TRUNC + INTERVAL arithmetic
|
||||
# because it lacks the function. Vertica supports it natively, so
|
||||
# drop the rewrite and fall back to the base lastday_sql.
|
||||
exp.LastDay: lambda self, e: self.function_fallback_sql(e),
|
||||
# Postgres rewrites DATEDIFF into EXTRACT(epoch ...) / AGE() math.
|
||||
# Vertica's native form is DATEDIFF(unit, start, end), matching
|
||||
# Snowflake's signature.
|
||||
exp.DateDiff: date_delta_sql("DATEDIFF"),
|
||||
exp.TsOrDsDiff: date_delta_sql("DATEDIFF"),
|
||||
}
|
||||
@@ -45,7 +45,7 @@ from sqlglot.optimizer.scope import (
|
||||
)
|
||||
|
||||
from superset.exceptions import QueryClauseValidationException, SupersetParseError
|
||||
from superset.sql.dialects import DB2, Dremio, Firebolt, OpenSearch, Pinot
|
||||
from superset.sql.dialects import DB2, Dremio, Firebolt, OpenSearch, Pinot, Vertica
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from superset.models.core import Database
|
||||
@@ -113,7 +113,7 @@ SQLGLOT_DIALECTS = {
|
||||
# "taosws": ???
|
||||
"teradatasql": Dialects.TERADATA,
|
||||
"trino": Dialects.TRINO,
|
||||
"vertica": Dialects.POSTGRES,
|
||||
"vertica": Vertica,
|
||||
"yql": Dialects.CLICKHOUSE,
|
||||
}
|
||||
|
||||
|
||||
197
tests/unit_tests/sql/dialects/vertica_tests.py
Normal file
197
tests/unit_tests/sql/dialects/vertica_tests.py
Normal file
@@ -0,0 +1,197 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from sqlglot import parse_one
|
||||
|
||||
from superset.sql.dialects.vertica import Vertica
|
||||
|
||||
|
||||
def test_last_day_round_trips_natively() -> None:
|
||||
"""
|
||||
Vertica supports LAST_DAY natively, unlike Postgres which rewrites it into
|
||||
DATE_TRUNC + INTERVAL arithmetic.
|
||||
"""
|
||||
sql = "SELECT LAST_DAY(DATE('2026-01-15'))"
|
||||
|
||||
ast = parse_one(sql, dialect=Vertica)
|
||||
regenerated = ast.sql(dialect=Vertica)
|
||||
|
||||
assert regenerated == "SELECT LAST_DAY(DATE('2026-01-15'))"
|
||||
|
||||
|
||||
def test_last_day_not_rewritten_like_postgres() -> None:
|
||||
"""
|
||||
Postgres rewrites LAST_DAY because it lacks the function. Verify Vertica
|
||||
does not inherit that rewrite.
|
||||
"""
|
||||
sql = "SELECT LAST_DAY(DATE('2026-01-15'))"
|
||||
|
||||
postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
|
||||
vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
|
||||
|
||||
assert "DATE_TRUNC" in postgres_sql
|
||||
assert "DATE_TRUNC" not in vertica_sql
|
||||
assert "LAST_DAY" in vertica_sql
|
||||
|
||||
|
||||
def test_translate_from_postgres_to_vertica() -> None:
|
||||
"""
|
||||
A LAST_DAY expression parsed from Postgres should generate the native
|
||||
Vertica form, not the Postgres rewrite.
|
||||
"""
|
||||
sql = "SELECT LAST_DAY(DATE('2026-01-15'))"
|
||||
|
||||
ast = parse_one(sql, dialect="postgres")
|
||||
regenerated = ast.sql(dialect=Vertica)
|
||||
|
||||
assert regenerated == "SELECT LAST_DAY(DATE('2026-01-15'))"
|
||||
|
||||
|
||||
def test_postgres_features_still_work() -> None:
|
||||
"""
|
||||
Vertica inherits from Postgres, so unrelated Postgres syntax should still
|
||||
parse and regenerate correctly.
|
||||
"""
|
||||
sql = "SELECT a::INT, b || c FROM t WHERE d ~ '^foo'"
|
||||
|
||||
ast = parse_one(sql, dialect=Vertica)
|
||||
regenerated = ast.sql(dialect=Vertica)
|
||||
|
||||
assert regenerated == "SELECT CAST(a AS INT), b || c FROM t WHERE d ~ '^foo'"
|
||||
|
||||
|
||||
def test_datediff_round_trips_natively() -> None:
|
||||
"""
|
||||
Vertica's DATEDIFF is ``DATEDIFF(unit, start, end)``. Postgres rewrites it
|
||||
into ``EXTRACT(epoch ...) / N`` arithmetic.
|
||||
"""
|
||||
sql = "SELECT DATEDIFF('day', a, b) FROM t"
|
||||
|
||||
ast = parse_one(sql, dialect=Vertica)
|
||||
regenerated = ast.sql(dialect=Vertica)
|
||||
|
||||
assert regenerated == "SELECT DATEDIFF(DAY, a, b) FROM t"
|
||||
|
||||
|
||||
def test_datediff_month_round_trips_natively() -> None:
|
||||
"""
|
||||
The MONTH unit exercises Postgres's ``AGE()`` rewrite branch, which Vertica
|
||||
does not need.
|
||||
"""
|
||||
sql = "SELECT DATEDIFF('month', a, b) FROM t"
|
||||
|
||||
ast = parse_one(sql, dialect=Vertica)
|
||||
regenerated = ast.sql(dialect=Vertica)
|
||||
|
||||
assert regenerated == "SELECT DATEDIFF(MONTH, a, b) FROM t"
|
||||
|
||||
|
||||
def test_datediff_not_rewritten_like_postgres() -> None:
|
||||
"""
|
||||
Postgres rewrites DATEDIFF; Vertica should keep the native call.
|
||||
"""
|
||||
sql = "SELECT DATEDIFF('day', a, b) FROM t"
|
||||
|
||||
postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
|
||||
vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
|
||||
|
||||
assert "DATEDIFF" not in postgres_sql
|
||||
assert "DATEDIFF(DAY" in vertica_sql
|
||||
|
||||
|
||||
def test_median_round_trips_natively() -> None:
|
||||
"""
|
||||
Vertica supports MEDIAN as a native analytic/aggregate function. Postgres
|
||||
rewrites it into PERCENTILE_CONT + WITHIN GROUP.
|
||||
"""
|
||||
sql = "SELECT MEDIAN(x) FROM t"
|
||||
|
||||
ast = parse_one(sql, dialect=Vertica)
|
||||
regenerated = ast.sql(dialect=Vertica)
|
||||
|
||||
assert regenerated == "SELECT MEDIAN(x) FROM t"
|
||||
|
||||
|
||||
def test_median_not_rewritten_like_postgres() -> None:
|
||||
sql = "SELECT MEDIAN(x) FROM t"
|
||||
|
||||
postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
|
||||
vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
|
||||
|
||||
assert "PERCENTILE_CONT" in postgres_sql
|
||||
assert "PERCENTILE_CONT" not in vertica_sql
|
||||
assert "MEDIAN" in vertica_sql
|
||||
|
||||
|
||||
def test_nvl2_round_trips_natively() -> None:
|
||||
"""
|
||||
Vertica supports NVL2 natively; Postgres rewrites it into a CASE.
|
||||
"""
|
||||
sql = "SELECT NVL2(a, b, c) FROM t"
|
||||
|
||||
ast = parse_one(sql, dialect=Vertica)
|
||||
regenerated = ast.sql(dialect=Vertica)
|
||||
|
||||
assert regenerated == "SELECT NVL2(a, b, c) FROM t"
|
||||
|
||||
|
||||
def test_nvl2_not_rewritten_like_postgres() -> None:
|
||||
sql = "SELECT NVL2(a, b, c) FROM t"
|
||||
|
||||
postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
|
||||
vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
|
||||
|
||||
assert "CASE" in postgres_sql
|
||||
assert "CASE" not in vertica_sql
|
||||
assert "NVL2" in vertica_sql
|
||||
|
||||
|
||||
def test_interval_uses_sql_standard_form() -> None:
|
||||
"""
|
||||
Vertica miscomputes month/year arithmetic when given the Postgres-style
|
||||
single-string interval (``INTERVAL '2 MONTH'``). The SQL-standard form
|
||||
``INTERVAL '2' MONTH`` is correct on all Vertica versions.
|
||||
See https://forum.vertica.com/discussion/229329/.
|
||||
"""
|
||||
sql = "SELECT date_col + INTERVAL '2 MONTH' FROM t"
|
||||
|
||||
ast = parse_one(sql, dialect=Vertica)
|
||||
regenerated = ast.sql(dialect=Vertica)
|
||||
|
||||
assert regenerated == "SELECT date_col + INTERVAL '2' MONTH FROM t"
|
||||
|
||||
|
||||
def test_interval_year_uses_sql_standard_form() -> None:
|
||||
sql = "SELECT date_col + INTERVAL '1 YEAR' FROM t"
|
||||
|
||||
ast = parse_one(sql, dialect=Vertica)
|
||||
regenerated = ast.sql(dialect=Vertica)
|
||||
|
||||
assert regenerated == "SELECT date_col + INTERVAL '1' YEAR FROM t"
|
||||
|
||||
|
||||
def test_interval_diverges_from_postgres() -> None:
|
||||
"""
|
||||
Postgres emits the combined-string form; Vertica should not.
|
||||
"""
|
||||
sql = "SELECT date_col + INTERVAL '2 MONTH' FROM t"
|
||||
|
||||
postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres")
|
||||
vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica)
|
||||
|
||||
assert "INTERVAL '2 MONTH'" in postgres_sql
|
||||
assert "INTERVAL '2' MONTH" in vertica_sql
|
||||
Reference in New Issue
Block a user