mirror of
https://github.com/apache/superset.git
synced 2026-04-12 20:57:55 +00:00
feat: apply RLS conservatively (#38683)
This commit is contained in:
382
tests/unit_tests/models/test_virtual_dataset_format.py
Normal file
382
tests/unit_tests/models/test_virtual_dataset_format.py
Normal file
@@ -0,0 +1,382 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
"""
|
||||
Tests for virtual dataset SQL handling with RLS and sqlglot formatting.
|
||||
|
||||
Two issues are covered:
|
||||
|
||||
1. **Unnecessary sqlglot round-trip** – ``get_from_clause()`` used to call
|
||||
``format()`` even when no RLS rules applied, which could silently rewrite
|
||||
dialect-specific SQL (e.g. ``NVL`` → ``COALESCE`` on Redshift).
|
||||
|
||||
2. **RLS subquery alias mismatch** – ``RLSAsSubqueryTransformer`` used the
|
||||
fully-qualified table name (``"schema.table"``) as the subquery alias,
|
||||
which broke column references that used just the table name
|
||||
(``table.column``), producing Redshift errors like
|
||||
``column "X" does not exist in virtual_table``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from flask import Flask
|
||||
from sqlalchemy.sql.elements import TextClause
|
||||
|
||||
from superset.models.helpers import ExploreMixin
|
||||
from superset.sql.parse import RLSMethod, SQLStatement, Table
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Fixtures
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def virtual_datasource() -> MagicMock:
|
||||
"""
|
||||
Create a mock datasource that behaves like a virtual dataset.
|
||||
"""
|
||||
datasource = MagicMock(spec=ExploreMixin)
|
||||
|
||||
# Wire up real methods from ExploreMixin
|
||||
datasource.get_from_clause = ExploreMixin.get_from_clause.__get__(datasource)
|
||||
datasource.text = lambda sql: TextClause(sql)
|
||||
|
||||
# Mock the database and db_engine_spec
|
||||
datasource.db_engine_spec.engine = "redshift"
|
||||
datasource.db_engine_spec.get_cte_query.return_value = None
|
||||
datasource.db_engine_spec.cte_alias = "__cte"
|
||||
datasource.database.get_default_schema.return_value = "public"
|
||||
datasource.catalog = None
|
||||
datasource.schema = "public"
|
||||
|
||||
return datasource
|
||||
|
||||
|
||||
def _set_virtual_sql(datasource: MagicMock, sql: str) -> None:
|
||||
"""
|
||||
Configure the mock datasource to return the given SQL.
|
||||
"""
|
||||
datasource.get_rendered_sql.return_value = sql
|
||||
|
||||
|
||||
def _get_subquery_sql(datasource: MagicMock) -> str:
|
||||
"""
|
||||
Run get_from_clause and extract the inner SQL from the virtual_table alias.
|
||||
"""
|
||||
from_clause, _ = datasource.get_from_clause(template_processor=None)
|
||||
return str(from_clause.element).strip()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 1. format() should only run when RLS predicates were actually applied
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestVirtualDatasetNoRLS:
|
||||
"""
|
||||
When no RLS predicates apply, the virtual dataset SQL must not be
|
||||
round-tripped through sqlglot's formatter.
|
||||
"""
|
||||
|
||||
@patch("superset.models.helpers.apply_rls", return_value=False)
|
||||
def test_sql_preserved_when_no_rls(
|
||||
self,
|
||||
mock_apply_rls: MagicMock,
|
||||
virtual_datasource: MagicMock,
|
||||
app: Flask,
|
||||
) -> None:
|
||||
"""
|
||||
The original SQL should be used verbatim when apply_rls returns False.
|
||||
"""
|
||||
original_sql = "SELECT pen_id, is_green FROM public.pens"
|
||||
_set_virtual_sql(virtual_datasource, original_sql)
|
||||
|
||||
inner_sql = _get_subquery_sql(virtual_datasource)
|
||||
assert inner_sql == original_sql
|
||||
|
||||
@patch("superset.models.helpers.apply_rls", return_value=False)
|
||||
def test_redshift_nvl_preserved_when_no_rls(
|
||||
self,
|
||||
mock_apply_rls: MagicMock,
|
||||
virtual_datasource: MagicMock,
|
||||
app: Flask,
|
||||
) -> None:
|
||||
"""
|
||||
sqlglot rewrites ``NVL(a, b)`` → ``COALESCE(a, b)`` for Redshift.
|
||||
Without the fix the inner SQL would be silently rewritten.
|
||||
"""
|
||||
original_sql = "SELECT pen_id, NVL(burn_flag, false) FROM pens"
|
||||
_set_virtual_sql(virtual_datasource, original_sql)
|
||||
|
||||
inner_sql = _get_subquery_sql(virtual_datasource)
|
||||
assert "NVL" in inner_sql
|
||||
assert "COALESCE" not in inner_sql
|
||||
|
||||
@patch("superset.models.helpers.apply_rls", return_value=False)
|
||||
def test_redshift_current_timestamp_preserved_when_no_rls(
|
||||
self,
|
||||
mock_apply_rls: MagicMock,
|
||||
virtual_datasource: MagicMock,
|
||||
app: Flask,
|
||||
) -> None:
|
||||
"""
|
||||
sqlglot rewrites ``current_timestamp`` → ``GETDATE()`` for Redshift.
|
||||
"""
|
||||
original_sql = "SELECT pen_id, current_timestamp FROM pens"
|
||||
_set_virtual_sql(virtual_datasource, original_sql)
|
||||
|
||||
inner_sql = _get_subquery_sql(virtual_datasource)
|
||||
assert "current_timestamp" in inner_sql
|
||||
assert "GETDATE" not in inner_sql
|
||||
|
||||
@patch("superset.models.helpers.apply_rls", return_value=False)
|
||||
def test_redshift_cast_syntax_preserved_when_no_rls(
|
||||
self,
|
||||
mock_apply_rls: MagicMock,
|
||||
virtual_datasource: MagicMock,
|
||||
app: Flask,
|
||||
) -> None:
|
||||
"""
|
||||
sqlglot rewrites Redshift ``::`` cast syntax to ``CAST(... AS ...)``.
|
||||
"""
|
||||
original_sql = (
|
||||
"SELECT pen_name::varchar(256) AS name, is_green FROM public.pens"
|
||||
)
|
||||
_set_virtual_sql(virtual_datasource, original_sql)
|
||||
|
||||
inner_sql = _get_subquery_sql(virtual_datasource)
|
||||
assert "::varchar(256)" in inner_sql
|
||||
|
||||
|
||||
class TestVirtualDatasetWithRLS:
|
||||
"""
|
||||
When RLS predicates are applied, the SQL must be regenerated via
|
||||
sqlglot to serialize the AST modifications.
|
||||
"""
|
||||
|
||||
@patch("superset.models.helpers.apply_rls", return_value=True)
|
||||
def test_sql_reformatted_when_rls_applied(
|
||||
self,
|
||||
mock_apply_rls: MagicMock,
|
||||
virtual_datasource: MagicMock,
|
||||
app: Flask,
|
||||
) -> None:
|
||||
"""
|
||||
When apply_rls returns True, the SQL should be regenerated through
|
||||
sqlglot's format() to serialize the AST modifications.
|
||||
"""
|
||||
original_sql = "SELECT pen_id, is_green FROM public.pens"
|
||||
_set_virtual_sql(virtual_datasource, original_sql)
|
||||
|
||||
inner_sql = _get_subquery_sql(virtual_datasource)
|
||||
|
||||
# After format(), sqlglot pretty-prints (adds newlines, etc.)
|
||||
assert inner_sql != original_sql
|
||||
# But the column must still be present
|
||||
assert "is_green" in inner_sql
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 2. apply_rls() return value
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestApplyRlsReturnValue:
|
||||
"""
|
||||
Test that apply_rls correctly reports whether predicates were applied.
|
||||
"""
|
||||
|
||||
def test_returns_false_when_no_tables(self, app: Flask) -> None:
|
||||
"""
|
||||
apply_rls should return False when the statement has no tables.
|
||||
"""
|
||||
from superset.utils.rls import apply_rls
|
||||
|
||||
database = MagicMock()
|
||||
database.db_engine_spec.get_rls_method.return_value = MagicMock()
|
||||
database.get_default_catalog.return_value = None
|
||||
|
||||
statement = MagicMock()
|
||||
statement.tables = []
|
||||
|
||||
result = apply_rls(
|
||||
database=database,
|
||||
catalog=None,
|
||||
schema="public",
|
||||
parsed_statement=statement,
|
||||
)
|
||||
assert result is False
|
||||
|
||||
@patch("superset.utils.rls.get_predicates_for_table")
|
||||
def test_returns_false_when_predicates_empty(
|
||||
self,
|
||||
mock_get_predicates: MagicMock,
|
||||
app: Flask,
|
||||
) -> None:
|
||||
"""
|
||||
apply_rls should return False when tables exist but have no RLS rules.
|
||||
"""
|
||||
from superset.utils.rls import apply_rls
|
||||
|
||||
mock_get_predicates.return_value = []
|
||||
|
||||
database = MagicMock()
|
||||
database.db_engine_spec.get_rls_method.return_value = MagicMock()
|
||||
database.get_default_catalog.return_value = None
|
||||
|
||||
mock_table = MagicMock()
|
||||
mock_table.qualify.return_value = Table("pens", "public", None)
|
||||
|
||||
statement = MagicMock()
|
||||
statement.tables = [mock_table]
|
||||
|
||||
result = apply_rls(
|
||||
database=database,
|
||||
catalog=None,
|
||||
schema="public",
|
||||
parsed_statement=statement,
|
||||
)
|
||||
assert result is False
|
||||
|
||||
@patch("superset.utils.rls.get_predicates_for_table")
|
||||
def test_returns_true_when_predicates_exist(
|
||||
self,
|
||||
mock_get_predicates: MagicMock,
|
||||
app: Flask,
|
||||
) -> None:
|
||||
"""
|
||||
apply_rls should return True when RLS predicates are found.
|
||||
"""
|
||||
from superset.utils.rls import apply_rls
|
||||
|
||||
mock_get_predicates.return_value = ["user_id = 42"]
|
||||
|
||||
database = MagicMock()
|
||||
database.db_engine_spec.get_rls_method.return_value = MagicMock()
|
||||
database.get_default_catalog.return_value = None
|
||||
|
||||
mock_table = MagicMock()
|
||||
mock_table.qualify.return_value = Table("pens", "public", None)
|
||||
|
||||
statement = MagicMock()
|
||||
statement.tables = [mock_table]
|
||||
statement.parse_predicate.return_value = MagicMock()
|
||||
|
||||
result = apply_rls(
|
||||
database=database,
|
||||
catalog=None,
|
||||
schema="public",
|
||||
parsed_statement=statement,
|
||||
)
|
||||
assert result is True
|
||||
statement.apply_rls.assert_called_once()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# 3. RLS subquery alias must use just the table name, not schema-qualified
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestRLSSubqueryAlias:
|
||||
"""
|
||||
When RLSAsSubqueryTransformer replaces a table with a filtered subquery,
|
||||
the subquery alias must match what column references in the query expect.
|
||||
|
||||
If the SQL says ``SELECT pens.col FROM public.pens``, the subquery
|
||||
must be aliased as ``"pens"`` (not ``"public.pens"``), otherwise
|
||||
``pens.col`` won't resolve and Redshift returns::
|
||||
|
||||
column "col" does not exist in virtual_table
|
||||
"""
|
||||
|
||||
def test_table_qualified_columns_no_alias(self, app: Flask) -> None:
|
||||
"""
|
||||
Column references like ``pens.col`` must resolve after RLS wraps
|
||||
``public.pens`` in a subquery.
|
||||
"""
|
||||
sql = "SELECT pens.pen_id, pens.is_green FROM public.pens"
|
||||
statement = SQLStatement(sql, engine="redshift")
|
||||
predicate = statement.parse_predicate("user_id = 1")
|
||||
statement.apply_rls(
|
||||
None,
|
||||
"public",
|
||||
{Table("pens", "public", None): [predicate]},
|
||||
RLSMethod.AS_SUBQUERY,
|
||||
)
|
||||
result = statement.format()
|
||||
|
||||
# The subquery alias must be just "pens", not "public.pens"
|
||||
assert 'AS "pens"' in result
|
||||
assert 'AS "public.pens"' not in result
|
||||
assert "pens.is_green" in result
|
||||
|
||||
def test_catalog_schema_qualified_table_no_alias(self, app: Flask) -> None:
|
||||
"""
|
||||
Even with a catalog-qualified table, the subquery alias should be
|
||||
just the table name so that ``table.col`` references still work.
|
||||
"""
|
||||
sql = "SELECT pens.pen_id, pens.is_green FROM mycat.public.pens"
|
||||
statement = SQLStatement(sql, engine="redshift")
|
||||
predicate = statement.parse_predicate("user_id = 1")
|
||||
statement.apply_rls(
|
||||
None,
|
||||
"public",
|
||||
{Table("pens", "public", "mycat"): [predicate]},
|
||||
RLSMethod.AS_SUBQUERY,
|
||||
)
|
||||
result = statement.format()
|
||||
|
||||
assert 'AS "pens"' in result
|
||||
assert 'AS "mycat.public.pens"' not in result
|
||||
|
||||
def test_explicit_alias_preserved(self, app: Flask) -> None:
|
||||
"""
|
||||
When the table already has an explicit alias, it should be reused.
|
||||
"""
|
||||
sql = "SELECT p.pen_id, p.is_green FROM public.pens p"
|
||||
statement = SQLStatement(sql, engine="redshift")
|
||||
predicate = statement.parse_predicate("user_id = 1")
|
||||
statement.apply_rls(
|
||||
None,
|
||||
"public",
|
||||
{Table("pens", "public", None): [predicate]},
|
||||
RLSMethod.AS_SUBQUERY,
|
||||
)
|
||||
result = statement.format()
|
||||
|
||||
assert "AS p" in result
|
||||
assert "p.is_green" in result
|
||||
|
||||
def test_unqualified_columns_work(self, app: Flask) -> None:
|
||||
"""
|
||||
Unqualified column references should work regardless of the alias.
|
||||
"""
|
||||
sql = "SELECT pen_id, is_green FROM public.pens"
|
||||
statement = SQLStatement(sql, engine="redshift")
|
||||
predicate = statement.parse_predicate("user_id = 1")
|
||||
statement.apply_rls(
|
||||
None,
|
||||
"public",
|
||||
{Table("pens", "public", None): [predicate]},
|
||||
RLSMethod.AS_SUBQUERY,
|
||||
)
|
||||
result = statement.format()
|
||||
|
||||
assert "is_green" in result
|
||||
assert "WHERE" in result # RLS predicate applied
|
||||
Reference in New Issue
Block a user