chore: organize SQL parsing files (#30258)

This commit is contained in:
Beto Dealmeida
2024-09-13 16:24:19 -04:00
committed by GitHub
parent 8cd18cac8c
commit bdf29cb7c2
13 changed files with 1650 additions and 886 deletions

View File

@@ -30,6 +30,7 @@ from superset.exceptions import (
QueryClauseValidationException,
SupersetSecurityException,
)
from superset.sql.parse import Table
from superset.sql_parse import (
add_table_name,
check_sql_functions_exist,
@@ -39,18 +40,13 @@ from superset.sql_parse import (
has_table_query,
insert_rls_as_subquery,
insert_rls_in_predicate,
KustoKQLStatement,
ParsedQuery,
sanitize_clause,
split_kql,
SQLScript,
SQLStatement,
strip_comments_from_sql,
Table,
)
def extract_tables(query: str, engine: Optional[str] = None) -> set[Table]:
def extract_tables(query: str, engine: str = "base") -> set[Table]:
"""
Helper function to extract tables referenced in a query.
"""
@@ -285,7 +281,7 @@ def test_extract_tables_illdefined() -> None:
extract_tables('SELECT * FROM "tbname')
assert (
str(excinfo.value)
== "You may have an error in your SQL statement. Error tokenizing 'SELECT * FROM \"tbnam'"
== "You may have an error in your SQL statement. Unable to parse script"
)
# odd edge case that works
@@ -1834,49 +1830,6 @@ SELECT * FROM t"""
assert ParsedQuery("USE foo; SELECT * FROM bar").is_select()
def test_sqlquery() -> None:
"""
Test the `SQLScript` class.
"""
script = SQLScript("SELECT 1; SELECT 2;", "sqlite")
assert len(script.statements) == 2
assert script.format() == "SELECT\n 1;\nSELECT\n 2"
assert script.statements[0].format() == "SELECT\n 1"
script = SQLScript("SET a=1; SET a=2; SELECT 3;", "sqlite")
assert script.get_settings() == {"a": "2"}
query = SQLScript(
"""set querytrace;
Events | take 100""",
"kustokql",
)
assert query.get_settings() == {"querytrace": True}
def test_sqlstatement() -> None:
"""
Test the `SQLStatement` class.
"""
statement = SQLStatement(
"SELECT * FROM table1 UNION ALL SELECT * FROM table2",
"sqlite",
)
assert statement.tables == {
Table(table="table1", schema=None, catalog=None),
Table(table="table2", schema=None, catalog=None),
}
assert (
statement.format()
== "SELECT\n *\nFROM table1\nUNION ALL\nSELECT\n *\nFROM table2"
)
statement = SQLStatement("SET a=1", "sqlite")
assert statement.get_settings() == {"a": "1"}
@pytest.mark.parametrize(
"engine",
[
@@ -1924,194 +1877,3 @@ def test_extract_tables_from_jinja_sql(
)
== expected
)
def test_kustokqlstatement_split_query() -> None:
"""
Test the `KustoKQLStatement` split method.
"""
statements = KustoKQLStatement.split_query(
"""
let totalPagesPerDay = PageViews
| summarize by Page, Day = startofday(Timestamp)
| summarize count() by Day;
let materializedScope = PageViews
| summarize by Page, Day = startofday(Timestamp);
let cachedResult = materialize(materializedScope);
cachedResult
| project Page, Day1 = Day
| join kind = inner
(
cachedResult
| project Page, Day2 = Day
)
on Page
| where Day2 > Day1
| summarize count() by Day1, Day2
| join kind = inner
totalPagesPerDay
on $left.Day1 == $right.Day
| project Day1, Day2, Percentage = count_*100.0/count_1
""",
"kustokql",
)
assert len(statements) == 4
def test_kustokqlstatement_with_program() -> None:
"""
Test the `KustoKQLStatement` split method when the KQL has a program.
"""
statements = KustoKQLStatement.split_query(
"""
print program = ```
public class Program {
public static void Main() {
System.Console.WriteLine("Hello!");
}
}```
""",
"kustokql",
)
assert len(statements) == 1
def test_kustokqlstatement_with_set() -> None:
"""
Test the `KustoKQLStatement` split method when the KQL has a set command.
"""
statements = KustoKQLStatement.split_query(
"""
set querytrace;
Events | take 100
""",
"kustokql",
)
assert len(statements) == 2
assert statements[0].format() == "set querytrace"
assert statements[1].format() == "Events | take 100"
@pytest.mark.parametrize(
"kql,statements",
[
('print banner=strcat("Hello", ", ", "World!")', 1),
(r"print 'O\'Malley\'s'", 1),
(r"print 'O\'Mal;ley\'s'", 1),
("print ```foo;\nbar;\nbaz;```\n", 1),
],
)
def test_kustokql_statement_split_special(kql: str, statements: int) -> None:
assert len(KustoKQLStatement.split_query(kql, "kustokql")) == statements
def test_split_kql() -> None:
"""
Test the `split_kql` function.
"""
kql = """
let totalPagesPerDay = PageViews
| summarize by Page, Day = startofday(Timestamp)
| summarize count() by Day;
let materializedScope = PageViews
| summarize by Page, Day = startofday(Timestamp);
let cachedResult = materialize(materializedScope);
cachedResult
| project Page, Day1 = Day
| join kind = inner
(
cachedResult
| project Page, Day2 = Day
)
on Page
| where Day2 > Day1
| summarize count() by Day1, Day2
| join kind = inner
totalPagesPerDay
on $left.Day1 == $right.Day
| project Day1, Day2, Percentage = count_*100.0/count_1
"""
assert split_kql(kql) == [
"""
let totalPagesPerDay = PageViews
| summarize by Page, Day = startofday(Timestamp)
| summarize count() by Day""",
"""
let materializedScope = PageViews
| summarize by Page, Day = startofday(Timestamp)""",
"""
let cachedResult = materialize(materializedScope)""",
"""
cachedResult
| project Page, Day1 = Day
| join kind = inner
(
cachedResult
| project Page, Day2 = Day
)
on Page
| where Day2 > Day1
| summarize count() by Day1, Day2
| join kind = inner
totalPagesPerDay
on $left.Day1 == $right.Day
| project Day1, Day2, Percentage = count_*100.0/count_1
""",
]
@pytest.mark.parametrize(
("engine", "sql", "expected"),
[
# SQLite tests
("sqlite", "SELECT 1", False),
("sqlite", "INSERT INTO foo VALUES (1)", True),
("sqlite", "UPDATE foo SET bar = 2 WHERE id = 1", True),
("sqlite", "DELETE FROM foo WHERE id = 1", True),
("sqlite", "CREATE TABLE foo (id INT, bar TEXT)", True),
("sqlite", "DROP TABLE foo", True),
("sqlite", "EXPLAIN SELECT * FROM foo", False),
("sqlite", "PRAGMA table_info(foo)", False),
("postgresql", "SELECT 1", False),
("postgresql", "INSERT INTO foo (id, bar) VALUES (1, 'test')", True),
("postgresql", "UPDATE foo SET bar = 'new' WHERE id = 1", True),
("postgresql", "DELETE FROM foo WHERE id = 1", True),
("postgresql", "CREATE TABLE foo (id SERIAL PRIMARY KEY, bar TEXT)", True),
("postgresql", "DROP TABLE foo", True),
("postgresql", "EXPLAIN ANALYZE SELECT * FROM foo", False),
("postgresql", "EXPLAIN ANALYZE DELETE FROM foo", True),
("postgresql", "SHOW search_path", False),
("postgresql", "SET search_path TO public", False),
(
"postgres",
"""
with source as (
select 1 as one
)
select * from source
""",
False,
),
("trino", "SELECT 1", False),
("trino", "INSERT INTO foo VALUES (1, 'bar')", True),
("trino", "UPDATE foo SET bar = 'baz' WHERE id = 1", True),
("trino", "DELETE FROM foo WHERE id = 1", True),
("trino", "CREATE TABLE foo (id INT, bar VARCHAR)", True),
("trino", "DROP TABLE foo", True),
("trino", "EXPLAIN SELECT * FROM foo", False),
("trino", "SHOW SCHEMAS", False),
("trino", "SET SESSION optimization_level = '3'", False),
("kustokql", "tbl | limit 100", False),
("kustokql", "let foo = 1; tbl | where bar == foo", False),
("kustokql", ".show tables", False),
("kustokql", "print 1", False),
("kustokql", "set querytrace; Events | take 100", False),
("kustokql", ".drop table foo", True),
("kustokql", ".set-or-append table foo <| bar", True),
],
)
def test_has_mutation(engine: str, sql: str, expected: bool) -> None:
"""
Test the `has_mutation` method.
"""
assert SQLScript(sql, engine).has_mutation() == expected