From 4311a15eb2bee9bc842396904e0bc3792c853449 Mon Sep 17 00:00:00 2001 From: Beto Dealmeida Date: Fri, 8 May 2026 13:34:34 -0400 Subject: [PATCH] feat(sqlglot): Vertica dialect (#39969) --- superset/sql/dialects/__init__.py | 11 +- superset/sql/dialects/vertica.py | 87 ++++++++ superset/sql/parse.py | 4 +- .../unit_tests/sql/dialects/vertica_tests.py | 197 ++++++++++++++++++ 4 files changed, 296 insertions(+), 3 deletions(-) create mode 100644 superset/sql/dialects/vertica.py create mode 100644 tests/unit_tests/sql/dialects/vertica_tests.py diff --git a/superset/sql/dialects/__init__.py b/superset/sql/dialects/__init__.py index 0334efb5f11..f7b82538115 100644 --- a/superset/sql/dialects/__init__.py +++ b/superset/sql/dialects/__init__.py @@ -20,5 +20,14 @@ from .dremio import Dremio from .firebolt import Firebolt, FireboltOld from .opensearch import OpenSearch from .pinot import Pinot +from .vertica import Vertica -__all__ = ["DB2", "Dremio", "Firebolt", "FireboltOld", "OpenSearch", "Pinot"] +__all__ = [ + "DB2", + "Dremio", + "Firebolt", + "FireboltOld", + "OpenSearch", + "Pinot", + "Vertica", +] diff --git a/superset/sql/dialects/vertica.py b/superset/sql/dialects/vertica.py new file mode 100644 index 00000000000..e26886f6ee8 --- /dev/null +++ b/superset/sql/dialects/vertica.py @@ -0,0 +1,87 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Vertica dialect. + +Vertica is wire-compatible with PostgreSQL but provides additional analytical +functions natively. This dialect extends the Postgres dialect to preserve +Vertica-native functions that the Postgres generator would otherwise rewrite. +""" + +from __future__ import annotations + +from sqlglot import exp +from sqlglot.dialects.dialect import date_delta_sql +from sqlglot.dialects.postgres import Postgres +from sqlglot.helper import seq_get + + +def _build_datediff(args: list[exp.Expression]) -> exp.DateDiff: + # Vertica's signature is DATEDIFF(unit, start, end); the default sqlglot + # parser assumes (end, start, unit), so we remap the positional args. + return exp.DateDiff( + this=seq_get(args, 2), + expression=seq_get(args, 1), + unit=exp.var(seq_get(args, 0).name) if seq_get(args, 0) else None, + ) + + +class Vertica(Postgres): + """ + Vertica dialect. + + Extends PostgreSQL by keeping functions that Vertica supports natively but + Postgres does not (e.g. ``LAST_DAY``, ``DATEDIFF``, ``MEDIAN``, ``NVL2``). + """ + + class Parser(Postgres.Parser): + FUNCTIONS = { + **Postgres.Parser.FUNCTIONS, + "DATEDIFF": _build_datediff, + "TIMESTAMPDIFF": _build_datediff, + } + + class Generator(Postgres.Generator): + # Vertica's LAST_DAY only accepts a date/timestamp; it does not take a + # date part argument like Snowflake's variant. + LAST_DAY_SUPPORTS_DATE_PART = False + + # Vertica supports MEDIAN and NVL2 natively; Postgres does not, and the + # inherited generator rewrites them into PERCENTILE_CONT and CASE + # expressions respectively. + SUPPORTS_MEDIAN = True + NVL2_SUPPORTED = True + + # Emit INTERVAL '' (SQL-standard) instead of the + # Postgres-style INTERVAL ' '. Vertica miscomputes the + # combined-string form for MONTH/YEAR units (treats them as a fixed + # number of days). See https://forum.vertica.com/discussion/229329/. + SINGLE_STRING_INTERVAL = False + + TRANSFORMS = { + **Postgres.Generator.TRANSFORMS, + # Postgres rewrites LAST_DAY into DATE_TRUNC + INTERVAL arithmetic + # because it lacks the function. Vertica supports it natively, so + # drop the rewrite and fall back to the base lastday_sql. + exp.LastDay: lambda self, e: self.function_fallback_sql(e), + # Postgres rewrites DATEDIFF into EXTRACT(epoch ...) / AGE() math. + # Vertica's native form is DATEDIFF(unit, start, end), matching + # Snowflake's signature. + exp.DateDiff: date_delta_sql("DATEDIFF"), + exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), + } diff --git a/superset/sql/parse.py b/superset/sql/parse.py index bb3ef5e1c4b..aab3a61c50e 100644 --- a/superset/sql/parse.py +++ b/superset/sql/parse.py @@ -45,7 +45,7 @@ from sqlglot.optimizer.scope import ( ) from superset.exceptions import QueryClauseValidationException, SupersetParseError -from superset.sql.dialects import DB2, Dremio, Firebolt, OpenSearch, Pinot +from superset.sql.dialects import DB2, Dremio, Firebolt, OpenSearch, Pinot, Vertica if TYPE_CHECKING: from superset.models.core import Database @@ -113,7 +113,7 @@ SQLGLOT_DIALECTS = { # "taosws": ??? "teradatasql": Dialects.TERADATA, "trino": Dialects.TRINO, - "vertica": Dialects.POSTGRES, + "vertica": Vertica, "yql": Dialects.CLICKHOUSE, } diff --git a/tests/unit_tests/sql/dialects/vertica_tests.py b/tests/unit_tests/sql/dialects/vertica_tests.py new file mode 100644 index 00000000000..bf39cf76cde --- /dev/null +++ b/tests/unit_tests/sql/dialects/vertica_tests.py @@ -0,0 +1,197 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from sqlglot import parse_one + +from superset.sql.dialects.vertica import Vertica + + +def test_last_day_round_trips_natively() -> None: + """ + Vertica supports LAST_DAY natively, unlike Postgres which rewrites it into + DATE_TRUNC + INTERVAL arithmetic. + """ + sql = "SELECT LAST_DAY(DATE('2026-01-15'))" + + ast = parse_one(sql, dialect=Vertica) + regenerated = ast.sql(dialect=Vertica) + + assert regenerated == "SELECT LAST_DAY(DATE('2026-01-15'))" + + +def test_last_day_not_rewritten_like_postgres() -> None: + """ + Postgres rewrites LAST_DAY because it lacks the function. Verify Vertica + does not inherit that rewrite. + """ + sql = "SELECT LAST_DAY(DATE('2026-01-15'))" + + postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres") + vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica) + + assert "DATE_TRUNC" in postgres_sql + assert "DATE_TRUNC" not in vertica_sql + assert "LAST_DAY" in vertica_sql + + +def test_translate_from_postgres_to_vertica() -> None: + """ + A LAST_DAY expression parsed from Postgres should generate the native + Vertica form, not the Postgres rewrite. + """ + sql = "SELECT LAST_DAY(DATE('2026-01-15'))" + + ast = parse_one(sql, dialect="postgres") + regenerated = ast.sql(dialect=Vertica) + + assert regenerated == "SELECT LAST_DAY(DATE('2026-01-15'))" + + +def test_postgres_features_still_work() -> None: + """ + Vertica inherits from Postgres, so unrelated Postgres syntax should still + parse and regenerate correctly. + """ + sql = "SELECT a::INT, b || c FROM t WHERE d ~ '^foo'" + + ast = parse_one(sql, dialect=Vertica) + regenerated = ast.sql(dialect=Vertica) + + assert regenerated == "SELECT CAST(a AS INT), b || c FROM t WHERE d ~ '^foo'" + + +def test_datediff_round_trips_natively() -> None: + """ + Vertica's DATEDIFF is ``DATEDIFF(unit, start, end)``. Postgres rewrites it + into ``EXTRACT(epoch ...) / N`` arithmetic. + """ + sql = "SELECT DATEDIFF('day', a, b) FROM t" + + ast = parse_one(sql, dialect=Vertica) + regenerated = ast.sql(dialect=Vertica) + + assert regenerated == "SELECT DATEDIFF(DAY, a, b) FROM t" + + +def test_datediff_month_round_trips_natively() -> None: + """ + The MONTH unit exercises Postgres's ``AGE()`` rewrite branch, which Vertica + does not need. + """ + sql = "SELECT DATEDIFF('month', a, b) FROM t" + + ast = parse_one(sql, dialect=Vertica) + regenerated = ast.sql(dialect=Vertica) + + assert regenerated == "SELECT DATEDIFF(MONTH, a, b) FROM t" + + +def test_datediff_not_rewritten_like_postgres() -> None: + """ + Postgres rewrites DATEDIFF; Vertica should keep the native call. + """ + sql = "SELECT DATEDIFF('day', a, b) FROM t" + + postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres") + vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica) + + assert "DATEDIFF" not in postgres_sql + assert "DATEDIFF(DAY" in vertica_sql + + +def test_median_round_trips_natively() -> None: + """ + Vertica supports MEDIAN as a native analytic/aggregate function. Postgres + rewrites it into PERCENTILE_CONT + WITHIN GROUP. + """ + sql = "SELECT MEDIAN(x) FROM t" + + ast = parse_one(sql, dialect=Vertica) + regenerated = ast.sql(dialect=Vertica) + + assert regenerated == "SELECT MEDIAN(x) FROM t" + + +def test_median_not_rewritten_like_postgres() -> None: + sql = "SELECT MEDIAN(x) FROM t" + + postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres") + vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica) + + assert "PERCENTILE_CONT" in postgres_sql + assert "PERCENTILE_CONT" not in vertica_sql + assert "MEDIAN" in vertica_sql + + +def test_nvl2_round_trips_natively() -> None: + """ + Vertica supports NVL2 natively; Postgres rewrites it into a CASE. + """ + sql = "SELECT NVL2(a, b, c) FROM t" + + ast = parse_one(sql, dialect=Vertica) + regenerated = ast.sql(dialect=Vertica) + + assert regenerated == "SELECT NVL2(a, b, c) FROM t" + + +def test_nvl2_not_rewritten_like_postgres() -> None: + sql = "SELECT NVL2(a, b, c) FROM t" + + postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres") + vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica) + + assert "CASE" in postgres_sql + assert "CASE" not in vertica_sql + assert "NVL2" in vertica_sql + + +def test_interval_uses_sql_standard_form() -> None: + """ + Vertica miscomputes month/year arithmetic when given the Postgres-style + single-string interval (``INTERVAL '2 MONTH'``). The SQL-standard form + ``INTERVAL '2' MONTH`` is correct on all Vertica versions. + See https://forum.vertica.com/discussion/229329/. + """ + sql = "SELECT date_col + INTERVAL '2 MONTH' FROM t" + + ast = parse_one(sql, dialect=Vertica) + regenerated = ast.sql(dialect=Vertica) + + assert regenerated == "SELECT date_col + INTERVAL '2' MONTH FROM t" + + +def test_interval_year_uses_sql_standard_form() -> None: + sql = "SELECT date_col + INTERVAL '1 YEAR' FROM t" + + ast = parse_one(sql, dialect=Vertica) + regenerated = ast.sql(dialect=Vertica) + + assert regenerated == "SELECT date_col + INTERVAL '1' YEAR FROM t" + + +def test_interval_diverges_from_postgres() -> None: + """ + Postgres emits the combined-string form; Vertica should not. + """ + sql = "SELECT date_col + INTERVAL '2 MONTH' FROM t" + + postgres_sql = parse_one(sql, dialect="postgres").sql(dialect="postgres") + vertica_sql = parse_one(sql, dialect=Vertica).sql(dialect=Vertica) + + assert "INTERVAL '2 MONTH'" in postgres_sql + assert "INTERVAL '2' MONTH" in vertica_sql