mirror of
https://github.com/apache/superset.git
synced 2026-04-28 12:34:23 +00:00
Compare commits
1 Commits
docs/testi
...
impala-dia
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a5fb5dcd54 |
@@ -17,5 +17,6 @@
|
||||
|
||||
from .dremio import Dremio
|
||||
from .firebolt import Firebolt, FireboltOld
|
||||
from .impala import Impala
|
||||
|
||||
__all__ = ["Dremio", "Firebolt", "FireboltOld"]
|
||||
__all__ = ["Dremio", "Firebolt", "FireboltOld", "Impala"]
|
||||
|
||||
259
superset/sql/dialects/impala.py
Normal file
259
superset/sql/dialects/impala.py
Normal file
@@ -0,0 +1,259 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import typing as t
|
||||
|
||||
from sqlglot import exp, generator, parser
|
||||
from sqlglot.dialects.hive import Hive
|
||||
from sqlglot.helper import seq_get
|
||||
|
||||
|
||||
class Impala(Hive):
|
||||
"""
|
||||
A sqlglot dialect for Impala.
|
||||
|
||||
Impala is similar to Hive but with some key differences:
|
||||
- No support for LATERAL VIEW, use JOIN with UNNEST instead
|
||||
- Different date/time functions
|
||||
- No support for TRANSFORM
|
||||
- Limited support for certain Hive-specific features
|
||||
"""
|
||||
|
||||
class Parser(Hive.Parser):
|
||||
FUNCTIONS = {
|
||||
**Hive.Parser.FUNCTIONS,
|
||||
# Impala-specific functions
|
||||
"MONTHS_ADD": lambda args: exp.DateAdd(
|
||||
this=seq_get(args, 0),
|
||||
expression=seq_get(args, 1),
|
||||
unit=exp.Literal.string("MONTH"),
|
||||
),
|
||||
"MONTHS_SUB": lambda args: exp.DateSub(
|
||||
this=seq_get(args, 0),
|
||||
expression=seq_get(args, 1),
|
||||
unit=exp.Literal.string("MONTH"),
|
||||
),
|
||||
"YEARS_ADD": lambda args: exp.DateAdd(
|
||||
this=seq_get(args, 0),
|
||||
expression=seq_get(args, 1),
|
||||
unit=exp.Literal.string("YEAR"),
|
||||
),
|
||||
"YEARS_SUB": lambda args: exp.DateSub(
|
||||
this=seq_get(args, 0),
|
||||
expression=seq_get(args, 1),
|
||||
unit=exp.Literal.string("YEAR"),
|
||||
),
|
||||
"DAYS_ADD": lambda args: exp.DateAdd(
|
||||
this=seq_get(args, 0),
|
||||
expression=seq_get(args, 1),
|
||||
unit=exp.Literal.string("DAY"),
|
||||
),
|
||||
"DAYS_SUB": lambda args: exp.DateSub(
|
||||
this=seq_get(args, 0),
|
||||
expression=seq_get(args, 1),
|
||||
unit=exp.Literal.string("DAY"),
|
||||
),
|
||||
"WEEKS_ADD": lambda args: exp.DateAdd(
|
||||
this=seq_get(args, 0),
|
||||
expression=seq_get(args, 1),
|
||||
unit=exp.Literal.string("WEEK"),
|
||||
),
|
||||
"WEEKS_SUB": lambda args: exp.DateSub(
|
||||
this=seq_get(args, 0),
|
||||
expression=seq_get(args, 1),
|
||||
unit=exp.Literal.string("WEEK"),
|
||||
),
|
||||
# Impala uses different names for some functions
|
||||
"DATE_PART": lambda args: _parse_date_part(args),
|
||||
"EXTRACT": lambda args: _parse_extract(args),
|
||||
# Override Hive functions that Impala doesn't support
|
||||
"STR_TO_MAP": None, # Not supported in Impala
|
||||
"XPATH": None, # Not supported in Impala
|
||||
"XPATH_BOOLEAN": None,
|
||||
"XPATH_DOUBLE": None,
|
||||
"XPATH_FLOAT": None,
|
||||
"XPATH_INT": None,
|
||||
"XPATH_LONG": None,
|
||||
"XPATH_SHORT": None,
|
||||
"XPATH_STRING": None,
|
||||
}
|
||||
|
||||
NO_PAREN_FUNCTION_PARSERS = {
|
||||
**parser.Parser.NO_PAREN_FUNCTION_PARSERS,
|
||||
# Remove TRANSFORM as it's not supported in Impala
|
||||
}
|
||||
NO_PAREN_FUNCTION_PARSERS.pop("TRANSFORM", None)
|
||||
|
||||
def _parse_lateral(self) -> t.Optional[exp.Lateral]:
|
||||
# Impala doesn't support LATERAL VIEW, it uses different syntax
|
||||
# This prevents parsing LATERAL VIEW syntax
|
||||
return None
|
||||
|
||||
class Generator(Hive.Generator):
|
||||
# Impala-specific type mappings
|
||||
TYPE_MAPPING = {
|
||||
**Hive.Generator.TYPE_MAPPING,
|
||||
exp.DataType.Type.VARCHAR: "STRING", # Impala treats VARCHAR as STRING
|
||||
exp.DataType.Type.NVARCHAR: "STRING",
|
||||
exp.DataType.Type.CHAR: "STRING", # Impala treats CHAR as STRING
|
||||
exp.DataType.Type.NCHAR: "STRING",
|
||||
}
|
||||
|
||||
TRANSFORMS = {
|
||||
**Hive.Generator.TRANSFORMS,
|
||||
# Date/time functions
|
||||
exp.DateAdd: lambda self, e: _date_add_sql(self, e),
|
||||
exp.DateSub: lambda self, e: _date_sub_sql(self, e),
|
||||
# Impala doesn't support certain Hive features
|
||||
exp.StrToMap: lambda self, e: self.unsupported(
|
||||
"STR_TO_MAP is not supported in Impala"
|
||||
),
|
||||
exp.Transform: lambda self, e: self.unsupported(
|
||||
"TRANSFORM is not supported in Impala"
|
||||
),
|
||||
exp.QueryTransform: lambda self, e: self.unsupported(
|
||||
"TRANSFORM is not supported in Impala"
|
||||
),
|
||||
# Override LATERAL VIEW handling
|
||||
exp.Lateral: lambda self, e: _lateral_sql(self, e),
|
||||
# JSON functions have different names in Impala
|
||||
exp.JSONExtract: lambda self, e: self.func(
|
||||
"JSON_QUERY", e.this, e.expression
|
||||
),
|
||||
exp.JSONExtractScalar: lambda self, e: self.func(
|
||||
"JSON_VALUE", e.this, e.expression
|
||||
),
|
||||
# Impala uses different syntax for COLLECT_LIST/SET
|
||||
exp.ArrayAgg: lambda self, e: self.func(
|
||||
"GROUP_CONCAT",
|
||||
e.this.this if isinstance(e.this, exp.Order) else e.this,
|
||||
exp.Literal.string(","),
|
||||
),
|
||||
exp.ArrayUniqueAgg: lambda self, e: self.func(
|
||||
"GROUP_CONCAT",
|
||||
self.sql(exp.Distinct(expressions=[e.this])),
|
||||
exp.Literal.string(","),
|
||||
),
|
||||
}
|
||||
|
||||
def datatype_sql(self, expression: exp.DataType) -> str:
|
||||
# Impala treats CHAR/VARCHAR as STRING
|
||||
if expression.is_type("char", "varchar", "nchar", "nvarchar"):
|
||||
return "STRING"
|
||||
|
||||
return super().datatype_sql(expression)
|
||||
|
||||
def lateral_sql(self, expression: exp.Lateral) -> str:
|
||||
# Impala doesn't use LATERAL VIEW syntax
|
||||
# Instead, it uses regular JOIN with UNNEST
|
||||
if isinstance(expression.this, exp.Unnest):
|
||||
return self.sql(expression.this)
|
||||
return super().lateral_sql(expression)
|
||||
|
||||
|
||||
def _parse_date_part(args: t.List[exp.Expression]) -> exp.Expression:
|
||||
"""Parse DATE_PART function which extracts date parts."""
|
||||
part = seq_get(args, 0)
|
||||
date = seq_get(args, 1)
|
||||
|
||||
if isinstance(part, exp.Literal):
|
||||
part_name = part.name.upper()
|
||||
if part_name == "YEAR":
|
||||
return exp.Year(this=date)
|
||||
elif part_name == "MONTH":
|
||||
return exp.Month(this=date)
|
||||
elif part_name == "DAY":
|
||||
return exp.Day(this=date)
|
||||
elif part_name == "HOUR":
|
||||
return exp.Hour(this=date)
|
||||
elif part_name == "MINUTE":
|
||||
return exp.Minute(this=date)
|
||||
elif part_name == "SECOND":
|
||||
return exp.Second(this=date)
|
||||
|
||||
return exp.Extract(this=part, expression=date)
|
||||
|
||||
|
||||
def _parse_extract(args: t.List[exp.Expression]) -> exp.Expression:
|
||||
"""Parse EXTRACT function."""
|
||||
return exp.Extract(this=seq_get(args, 0), expression=seq_get(args, 1))
|
||||
|
||||
|
||||
def _date_add_sql(self: generator.Generator, expression: exp.DateAdd) -> str:
|
||||
"""Generate SQL for date addition in Impala."""
|
||||
unit = expression.text("unit").upper()
|
||||
|
||||
# Map generic units to Impala-specific functions
|
||||
unit_map = {
|
||||
"YEAR": "YEARS_ADD",
|
||||
"MONTH": "MONTHS_ADD",
|
||||
"WEEK": "WEEKS_ADD",
|
||||
"DAY": "DAYS_ADD",
|
||||
}
|
||||
|
||||
func_name = unit_map.get(unit, "DATE_ADD")
|
||||
|
||||
if func_name != "DATE_ADD":
|
||||
return self.func(func_name, expression.this, expression.expression)
|
||||
|
||||
# For other units, use DATE_ADD with INTERVAL
|
||||
return self.func(
|
||||
"DATE_ADD",
|
||||
expression.this,
|
||||
self.sql(exp.Interval(this=expression.expression, unit=expression.unit)),
|
||||
)
|
||||
|
||||
|
||||
def _date_sub_sql(self: generator.Generator, expression: exp.DateSub) -> str:
|
||||
"""Generate SQL for date subtraction in Impala."""
|
||||
unit = expression.text("unit").upper()
|
||||
|
||||
# Map generic units to Impala-specific functions
|
||||
unit_map = {
|
||||
"YEAR": "YEARS_SUB",
|
||||
"MONTH": "MONTHS_SUB",
|
||||
"WEEK": "WEEKS_SUB",
|
||||
"DAY": "DAYS_SUB",
|
||||
}
|
||||
|
||||
func_name = unit_map.get(unit, "DATE_SUB")
|
||||
|
||||
if func_name != "DATE_SUB":
|
||||
return self.func(func_name, expression.this, expression.expression)
|
||||
|
||||
# For other units, use DATE_SUB with INTERVAL
|
||||
return self.func(
|
||||
"DATE_SUB",
|
||||
expression.this,
|
||||
self.sql(exp.Interval(this=expression.expression, unit=expression.unit)),
|
||||
)
|
||||
|
||||
|
||||
def _lateral_sql(self: generator.Generator, expression: exp.Lateral) -> str:
|
||||
"""Generate SQL for LATERAL expressions in Impala."""
|
||||
# Impala doesn't support LATERAL VIEW syntax
|
||||
# It uses regular JOIN with UNNEST instead
|
||||
this = expression.this
|
||||
|
||||
if isinstance(this, exp.Unnest):
|
||||
# Just return the UNNEST expression without LATERAL VIEW
|
||||
return self.sql(this)
|
||||
|
||||
# For other cases, try to generate standard SQL
|
||||
return self.sql(this)
|
||||
@@ -44,7 +44,7 @@ from sqlglot.optimizer.scope import (
|
||||
)
|
||||
|
||||
from superset.exceptions import QueryClauseValidationException, SupersetParseError
|
||||
from superset.sql.dialects import Dremio, Firebolt
|
||||
from superset.sql.dialects import Dremio, Firebolt, Impala
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from superset.models.core import Database
|
||||
@@ -81,7 +81,7 @@ SQLGLOT_DIALECTS = {
|
||||
"hana": Dialects.POSTGRES,
|
||||
"hive": Dialects.HIVE,
|
||||
# "ibmi": ???
|
||||
# "impala": ???
|
||||
"impala": Impala,
|
||||
# "kustosql": ???
|
||||
# "kylin": ???
|
||||
"mariadb": Dialects.MYSQL,
|
||||
|
||||
319
tests/unit_tests/sql/dialects/impala_tests.py
Normal file
319
tests/unit_tests/sql/dialects/impala_tests.py
Normal file
@@ -0,0 +1,319 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import pytest
|
||||
from sqlglot import exp, parse_one
|
||||
|
||||
from superset.sql.dialects.impala import Impala
|
||||
|
||||
|
||||
def test_impala_date_add_functions() -> None:
|
||||
"""
|
||||
Test Impala-specific date addition functions.
|
||||
"""
|
||||
# Test MONTHS_ADD
|
||||
sql = "SELECT MONTHS_ADD(date_col, 3) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT MONTHS_ADD(date_col, 3) FROM table1"
|
||||
|
||||
# Test YEARS_ADD
|
||||
sql = "SELECT YEARS_ADD(date_col, 2) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT YEARS_ADD(date_col, 2) FROM table1"
|
||||
|
||||
# Test DAYS_ADD
|
||||
sql = "SELECT DAYS_ADD(date_col, 7) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT DAYS_ADD(date_col, 7) FROM table1"
|
||||
|
||||
# Test WEEKS_ADD
|
||||
sql = "SELECT WEEKS_ADD(date_col, 4) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT WEEKS_ADD(date_col, 4) FROM table1"
|
||||
|
||||
|
||||
def test_impala_date_sub_functions() -> None:
|
||||
"""
|
||||
Test Impala-specific date subtraction functions.
|
||||
"""
|
||||
# Test MONTHS_SUB
|
||||
sql = "SELECT MONTHS_SUB(date_col, 3) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT MONTHS_SUB(date_col, 3) FROM table1"
|
||||
|
||||
# Test YEARS_SUB
|
||||
sql = "SELECT YEARS_SUB(date_col, 2) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT YEARS_SUB(date_col, 2) FROM table1"
|
||||
|
||||
# Test DAYS_SUB
|
||||
sql = "SELECT DAYS_SUB(date_col, 7) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT DAYS_SUB(date_col, 7) FROM table1"
|
||||
|
||||
# Test WEEKS_SUB
|
||||
sql = "SELECT WEEKS_SUB(date_col, 4) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT WEEKS_SUB(date_col, 4) FROM table1"
|
||||
|
||||
|
||||
def test_impala_date_part_function() -> None:
|
||||
"""
|
||||
Test DATE_PART function parsing.
|
||||
"""
|
||||
# Test DATE_PART with YEAR
|
||||
sql = "SELECT DATE_PART('YEAR', date_col) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT YEAR(date_col) FROM table1"
|
||||
|
||||
# Test DATE_PART with MONTH
|
||||
sql = "SELECT DATE_PART('MONTH', date_col) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT MONTH(date_col) FROM table1"
|
||||
|
||||
# Test DATE_PART with DAY
|
||||
sql = "SELECT DATE_PART('DAY', date_col) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT DAY(date_col) FROM table1"
|
||||
|
||||
|
||||
def test_impala_data_types() -> None:
|
||||
"""
|
||||
Test that Impala treats VARCHAR/CHAR as STRING.
|
||||
"""
|
||||
# Test VARCHAR conversion
|
||||
sql = "CREATE TABLE test (col1 VARCHAR(100))"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "CREATE TABLE test (col1 STRING)"
|
||||
|
||||
# Test CHAR conversion
|
||||
sql = "CREATE TABLE test (col1 CHAR(10))"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "CREATE TABLE test (col1 STRING)"
|
||||
|
||||
# Test NVARCHAR conversion
|
||||
sql = "CREATE TABLE test (col1 NVARCHAR(50))"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "CREATE TABLE test (col1 STRING)"
|
||||
|
||||
|
||||
def test_impala_unsupported_functions() -> None:
|
||||
"""
|
||||
Test that unsupported Hive functions are handled properly.
|
||||
"""
|
||||
# STR_TO_MAP is not supported
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
sql = "SELECT STR_TO_MAP('a:1,b:2', ',', ':') FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
ast.sql(dialect=Impala)
|
||||
assert "STR_TO_MAP is not supported" in str(exc_info.value)
|
||||
|
||||
# TRANSFORM is not supported
|
||||
with pytest.raises(Exception) as exc_info:
|
||||
sql = "SELECT TRANSFORM(col) USING 'script.py' FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
ast.sql(dialect=Impala)
|
||||
assert "TRANSFORM is not supported" in str(exc_info.value)
|
||||
|
||||
|
||||
def test_impala_json_functions() -> None:
|
||||
"""
|
||||
Test that JSON functions are mapped to Impala equivalents.
|
||||
"""
|
||||
# Test JSON extract scalar to JSON_VALUE
|
||||
sql = "SELECT GET_JSON_OBJECT(json_col, '$.field') FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT JSON_VALUE(json_col, '$.field') FROM table1"
|
||||
|
||||
|
||||
def test_impala_aggregate_functions() -> None:
|
||||
"""
|
||||
Test that COLLECT_LIST/SET are mapped to GROUP_CONCAT.
|
||||
"""
|
||||
# Test COLLECT_LIST conversion
|
||||
sql = "SELECT COLLECT_LIST(col1) FROM table1 GROUP BY col2"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT GROUP_CONCAT(col1, ',') FROM table1 GROUP BY col2"
|
||||
|
||||
# Test COLLECT_SET conversion
|
||||
sql = "SELECT COLLECT_SET(col1) FROM table1 GROUP BY col2"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert (
|
||||
regenerated
|
||||
== "SELECT GROUP_CONCAT(DISTINCT col1, ',') FROM table1 GROUP BY col2"
|
||||
)
|
||||
|
||||
|
||||
def test_standard_date_add_sub() -> None:
|
||||
"""
|
||||
Test standard DATE_ADD/DATE_SUB with generic units.
|
||||
"""
|
||||
# Generic DATE_ADD (should be parsed and regenerated as-is)
|
||||
sql = "SELECT DATE_ADD(date_col, 5) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT DAYS_ADD(date_col, 5) FROM table1"
|
||||
|
||||
|
||||
def test_unnest_without_lateral_view() -> None:
|
||||
"""
|
||||
Test that UNNEST is handled without LATERAL VIEW syntax.
|
||||
"""
|
||||
# Impala uses JOIN with UNNEST instead of LATERAL VIEW
|
||||
sql = "SELECT * FROM table1, UNNEST(array_col) AS t(elem)"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
# Should not contain LATERAL VIEW
|
||||
assert "LATERAL VIEW" not in regenerated
|
||||
assert "UNNEST" in regenerated
|
||||
|
||||
|
||||
def test_extract_function() -> None:
|
||||
"""
|
||||
Test EXTRACT function.
|
||||
"""
|
||||
sql = "SELECT EXTRACT(YEAR FROM date_col) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT EXTRACT(YEAR FROM date_col) FROM table1"
|
||||
|
||||
sql = "SELECT EXTRACT(MONTH FROM date_col) FROM table1"
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
assert regenerated == "SELECT EXTRACT(MONTH FROM date_col) FROM table1"
|
||||
|
||||
|
||||
def test_complex_query() -> None:
|
||||
"""
|
||||
Test a more complex query with multiple Impala-specific features.
|
||||
"""
|
||||
sql = """
|
||||
SELECT
|
||||
customer_id,
|
||||
GROUP_CONCAT(product_name, ',') as products,
|
||||
MONTHS_ADD(order_date, 1) as next_month,
|
||||
JSON_VALUE(order_details, '$.total') as total
|
||||
FROM orders
|
||||
WHERE YEARS_SUB(order_date, 1) > '2022-01-01'
|
||||
GROUP BY customer_id, order_date, order_details
|
||||
"""
|
||||
|
||||
ast = parse_one(sql, dialect=Impala)
|
||||
regenerated = ast.sql(dialect=Impala)
|
||||
|
||||
# Check key components are preserved
|
||||
assert "GROUP_CONCAT" in regenerated
|
||||
assert "MONTHS_ADD" in regenerated
|
||||
assert "JSON_VALUE" in regenerated
|
||||
assert "YEARS_SUB" in regenerated
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"func, expected_class, unit",
|
||||
[
|
||||
("MONTHS_ADD(x, 1)", exp.DateAdd, "MONTH"),
|
||||
("YEARS_SUB(x, 2)", exp.DateSub, "YEAR"),
|
||||
("DAYS_ADD(x, 3)", exp.DateAdd, "DAY"),
|
||||
("WEEKS_SUB(x, 4)", exp.DateSub, "WEEK"),
|
||||
],
|
||||
)
|
||||
def test_date_functions_parse_correctly(func, expected_class, unit):
|
||||
parsed = parse_one(func, read=Impala)
|
||||
assert isinstance(parsed, expected_class)
|
||||
assert parsed.text("unit").upper() == unit
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sql, expected_expr",
|
||||
[
|
||||
("DATE_PART('year', d)", exp.Year),
|
||||
("DATE_PART('second', d)", exp.Second),
|
||||
("EXTRACT(year FROM d)", exp.Extract),
|
||||
],
|
||||
)
|
||||
def test_date_part_and_extract(sql, expected_expr):
|
||||
parsed = parse_one(sql, read=Impala)
|
||||
if expected_expr == exp.Extract:
|
||||
assert isinstance(parsed, expected_expr)
|
||||
else:
|
||||
assert isinstance(parsed, expected_expr)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"expr, expected_sql",
|
||||
[
|
||||
(
|
||||
exp.DateAdd(
|
||||
this=exp.Column(this="x"),
|
||||
expression=exp.Literal.number(1),
|
||||
unit=exp.Literal.string("YEAR"),
|
||||
),
|
||||
"YEARS_ADD(x, 1)",
|
||||
),
|
||||
(
|
||||
exp.DateSub(
|
||||
this=exp.Column(this="y"),
|
||||
expression=exp.Literal.number(2),
|
||||
unit=exp.Literal.string("MONTH"),
|
||||
),
|
||||
"MONTHS_SUB(y, 2)",
|
||||
),
|
||||
],
|
||||
)
|
||||
def test_sql_generation_for_date_add_sub(expr, expected_sql):
|
||||
sql = expr.sql(dialect=Impala)
|
||||
assert sql == expected_sql
|
||||
|
||||
|
||||
def test_string_type_is_mapped():
|
||||
expr = exp.DataType.build("VARCHAR")
|
||||
sql = expr.sql(dialect=Impala)
|
||||
assert sql == "STRING"
|
||||
|
||||
|
||||
def test_unsupported_functions_raise():
|
||||
gen = Impala.Generator()
|
||||
with pytest.raises(Exception):
|
||||
gen.sql(exp.StrToMap(this=exp.Literal.string("x")))
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"sql",
|
||||
[
|
||||
"LATERAL VIEW explode(arr) t",
|
||||
],
|
||||
)
|
||||
def test_lateral_not_supported(sql):
|
||||
parsed = parse_one(sql, read=Impala)
|
||||
assert parsed is None or isinstance(parsed, exp.Expression)
|
||||
Reference in New Issue
Block a user