fix(embedded): prevent double RLS application in virtual datasets (#37395)

This commit is contained in:
Yuriy Krasilnikov
2026-03-12 16:12:59 +03:00
committed by GitHub
parent a9def2fc15
commit 09e9c6a522
6 changed files with 610 additions and 4 deletions

View File

@@ -0,0 +1,286 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Tests for double RLS application in virtual datasets (Issue #37359).
This module tests that guest user RLS filters are applied only once
when querying virtual datasets, not both in the underlying table SQL
and the outer query.
"""
from __future__ import annotations
from unittest.mock import MagicMock, patch
import pytest
from flask import Flask
from sqlalchemy.sql.elements import TextClause
from superset.connectors.sqla.models import BaseDatasource
@pytest.fixture
def mock_datasource() -> MagicMock:
"""Create a mock datasource for testing."""
datasource = MagicMock(spec=BaseDatasource)
datasource.get_template_processor.return_value = MagicMock()
datasource.get_template_processor.return_value.process_template = lambda x: x
datasource.text = lambda x: TextClause(x)
return datasource
def test_rls_filters_include_guest_when_enabled(
mock_datasource: MagicMock,
app: Flask,
) -> None:
"""
Test that RLS filters include guest filters when enabled.
When include_global_guest_rls=True and EMBEDDED_SUPERSET is enabled,
both regular and guest RLS filters should be returned.
"""
regular_filter = MagicMock()
regular_filter.clause = "col1 = 'value1'"
regular_filter.group_key = None
guest_rule = {"clause": "col2 = 'value2'"}
with (
patch(
"superset.connectors.sqla.models.security_manager.get_rls_filters",
return_value=[regular_filter],
),
patch(
"superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
return_value=[guest_rule],
),
patch(
"superset.connectors.sqla.models.is_feature_enabled",
return_value=True,
),
):
# Call with include_global_guest_rls=True
filters = BaseDatasource.get_sqla_row_level_filters(
mock_datasource, include_global_guest_rls=True
)
# Should include both regular and guest RLS
assert len(filters) == 2
filter_strs = [str(f) for f in filters]
assert any("col1" in s for s in filter_strs)
assert any("col2" in s for s in filter_strs)
def test_rls_filters_exclude_guest_when_requested(
mock_datasource: MagicMock,
app: Flask,
) -> None:
"""
Test that RLS filters exclude guest filters when requested.
Issue #37359: When analyzing underlying tables in virtual datasets,
guest RLS should be excluded to prevent double application.
"""
regular_filter = MagicMock()
regular_filter.clause = "col1 = 'value1'"
regular_filter.group_key = None
guest_rule = {"clause": "col2 = 'value2'"}
with (
patch(
"superset.connectors.sqla.models.security_manager.get_rls_filters",
return_value=[regular_filter],
),
patch(
"superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
return_value=[guest_rule],
),
patch(
"superset.connectors.sqla.models.is_feature_enabled",
return_value=True,
),
):
# Call internal API with include_global_guest_rls=False
filters = BaseDatasource.get_sqla_row_level_filters(
mock_datasource, include_global_guest_rls=False
)
# Should include only regular RLS, not guest RLS
assert len(filters) == 1
filter_strs = [str(f) for f in filters]
assert any("col1" in s for s in filter_strs)
assert not any("col2" in s for s in filter_strs)
def test_rls_filters_include_guest_by_default(
mock_datasource: MagicMock,
app: Flask,
) -> None:
"""
Test that RLS filters include guest filters by default.
The default behavior (include_global_guest_rls=True) ensures backwards
compatibility with existing code.
"""
regular_filter = MagicMock()
regular_filter.clause = "col1 = 'value1'"
regular_filter.group_key = None
guest_rule = {"clause": "col2 = 'value2'"}
with (
patch(
"superset.connectors.sqla.models.security_manager.get_rls_filters",
return_value=[regular_filter],
),
patch(
"superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
return_value=[guest_rule],
),
patch(
"superset.connectors.sqla.models.is_feature_enabled",
return_value=True,
),
):
# Call internal API with default include_global_guest_rls=True
filters = BaseDatasource.get_sqla_row_level_filters(mock_datasource)
# Should include both regular and guest RLS
assert len(filters) == 2
def test_regular_rls_always_included(
mock_datasource: MagicMock,
app: Flask,
) -> None:
"""
Test that regular (non-guest) RLS is always included.
Even when include_global_guest_rls=False, regular RLS filters must still
be applied to underlying tables in virtual datasets.
"""
regular_filter = MagicMock()
regular_filter.clause = "tenant_id = 123"
regular_filter.group_key = None
with (
patch(
"superset.connectors.sqla.models.security_manager.get_rls_filters",
return_value=[regular_filter],
),
patch(
"superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
return_value=[],
),
patch(
"superset.connectors.sqla.models.is_feature_enabled",
return_value=True,
),
):
# Call internal API with include_global_guest_rls=False
filters = BaseDatasource.get_sqla_row_level_filters(
mock_datasource, include_global_guest_rls=False
)
# Regular RLS should still be included
assert len(filters) == 1
assert "tenant_id" in str(filters[0])
def test_guest_rls_skipped_when_feature_disabled(
mock_datasource: MagicMock,
app: Flask,
) -> None:
"""
Test that guest RLS is skipped when EMBEDDED_SUPERSET is disabled.
This verifies that the feature flag is respected regardless of
the include_global_guest_rls parameter.
"""
regular_filter = MagicMock()
regular_filter.clause = "col1 = 'value1'"
regular_filter.group_key = None
guest_rule = {"clause": "col2 = 'value2'"}
with (
patch(
"superset.connectors.sqla.models.security_manager.get_rls_filters",
return_value=[regular_filter],
),
patch(
"superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
return_value=[guest_rule],
),
patch(
"superset.connectors.sqla.models.is_feature_enabled",
return_value=False, # Feature disabled
),
):
# Even with include_global_guest_rls=True, feature flag takes precedence
filters = BaseDatasource.get_sqla_row_level_filters(
mock_datasource, include_global_guest_rls=True
)
# Should include only regular RLS
assert len(filters) == 1
assert not any("col2" in str(f) for f in filters)
def test_filter_grouping_preserved(
mock_datasource: MagicMock,
app: Flask,
) -> None:
"""
Test that filter grouping logic is preserved in internal method.
Filters with the same group_key should be ORed together, while
different groups are ANDed.
"""
filter1 = MagicMock()
filter1.clause = "region = 'US'"
filter1.group_key = "region_group"
filter2 = MagicMock()
filter2.clause = "region = 'EU'"
filter2.group_key = "region_group"
filter3 = MagicMock()
filter3.clause = "active = true"
filter3.group_key = None
with (
patch(
"superset.connectors.sqla.models.security_manager.get_rls_filters",
return_value=[filter1, filter2, filter3],
),
patch(
"superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
return_value=[],
),
patch(
"superset.connectors.sqla.models.is_feature_enabled",
return_value=False,
),
):
filters = BaseDatasource.get_sqla_row_level_filters(
mock_datasource, include_global_guest_rls=False
)
# Should have 2 filters: one ungrouped, one grouped (ORed)
assert len(filters) == 2

View File

@@ -0,0 +1,298 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Tests for guest RLS scoping in virtual dataset scenarios.
Verifies that dataset-scoped guest RLS rules are correctly applied
when querying through virtual datasets, and that global (unscoped)
guest rules are not duplicated across inner and outer queries.
"""
from __future__ import annotations
from typing import Callable
from unittest.mock import MagicMock, patch
from flask import Flask
from pytest_mock import MockerFixture
from sqlalchemy.sql.elements import TextClause
from superset.connectors.sqla.models import BaseDatasource
from superset.security.guest_token import (
GuestToken,
GuestTokenResourceType,
GuestTokenRlsRule,
GuestUser,
)
from superset.sql.parse import Table
from superset.utils.rls import get_predicates_for_table
def _make_datasource(dataset_id: int) -> MagicMock:
datasource = MagicMock(spec=BaseDatasource)
datasource.get_template_processor.return_value = MagicMock()
datasource.get_template_processor.return_value.process_template = lambda x: x
datasource.text = lambda x: TextClause(x)
datasource.data = {"id": dataset_id}
datasource.is_rls_supported = True
return datasource
def _make_guest_user(rules: list[GuestTokenRlsRule]) -> GuestUser:
token: GuestToken = {
"user": {},
"resources": [
{"type": GuestTokenResourceType.DASHBOARD, "id": "test-dashboard-uuid"}
],
"rls_rules": rules,
"iat": 10,
"exp": 20,
}
return GuestUser(token=token, roles=[])
def _guest_rls_filter(
guest_user: GuestUser,
) -> Callable[[MagicMock], list[GuestTokenRlsRule]]:
"""Replicate real get_guest_rls_filters logic from manager.py:2709-2714."""
def _filter(dataset: MagicMock) -> list[GuestTokenRlsRule]:
return [
rule
for rule in guest_user.rls
if not rule.get("dataset")
or str(rule.get("dataset")) == str(dataset.data["id"])
]
return _filter
def test_scoped_guest_rule_preserved_in_virtual_dataset(app: Flask) -> None:
"""
Dataset-scoped guest RLS rule is preserved when querying through
a virtual dataset.
Path A (inner SQL): include_global_guest_rls=False skips only global rules,
but dataset-scoped rules matching PD are kept.
Path B (outer query): get_guest_rls_filters(VD) doesn't match because
rule.dataset=42 != VD.id=99, but that's OK because
the rule was already applied in Path A.
"""
pd_id = 42
vd_id = 99
scoped_rule = GuestTokenRlsRule(dataset=str(pd_id), clause="tenant_id = 5")
guest_user = _make_guest_user(rules=[scoped_rule])
mock_pd = _make_datasource(pd_id)
mock_vd = _make_datasource(vd_id)
with (
patch(
"superset.connectors.sqla.models.security_manager.get_rls_filters",
return_value=[],
),
patch(
"superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
wraps=_guest_rls_filter(guest_user),
),
patch(
"superset.connectors.sqla.models.is_feature_enabled",
return_value=True,
),
):
# PATH A: what get_predicates_for_table() calls on PD
inner_filters = BaseDatasource.get_sqla_row_level_filters(
mock_pd, include_global_guest_rls=False
)
# PATH B: what get_sqla_query() calls on VD
outer_filters = BaseDatasource.get_sqla_row_level_filters(
mock_vd, include_global_guest_rls=True
)
inner_has_rule = any("tenant_id" in str(f) for f in inner_filters)
outer_has_rule = any("tenant_id" in str(f) for f in outer_filters)
# Scoped rule must appear in inner SQL (PD matches rule.dataset)
assert inner_has_rule, (
f"Dataset-scoped guest rule 'tenant_id = 5' (scoped to PD "
f"id={pd_id}) should be applied in inner SQL. "
f"inner_filters={[str(f) for f in inner_filters]}"
)
# Scoped rule should NOT appear in outer SQL (VD.id != rule.dataset)
assert not outer_has_rule, (
f"Dataset-scoped guest rule should not match VD id={vd_id}. "
f"outer_filters={[str(f) for f in outer_filters]}"
)
def test_unscoped_guest_rule_duplicated_in_virtual_dataset(app: Flask) -> None:
"""
Unscoped (global) guest rules match ANY dataset — these are the ones
that get duplicated in virtual datasets (the original issue #37359).
The fix should selectively filter only unscoped rules from inner SQL,
not all guest rules.
"""
pd_id = 42
vd_id = 99
unscoped_rule = GuestTokenRlsRule(dataset=None, clause="org_id = 1")
guest_user = _make_guest_user(rules=[unscoped_rule])
mock_pd = _make_datasource(pd_id)
mock_vd = _make_datasource(vd_id)
with (
patch(
"superset.connectors.sqla.models.security_manager.get_rls_filters",
return_value=[],
),
patch(
"superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
wraps=_guest_rls_filter(guest_user),
),
patch(
"superset.connectors.sqla.models.is_feature_enabled",
return_value=True,
),
):
# Both paths with guest RLS enabled (before the fix)
inner_filters = BaseDatasource.get_sqla_row_level_filters(
mock_pd, include_global_guest_rls=True
)
outer_filters = BaseDatasource.get_sqla_row_level_filters(
mock_vd, include_global_guest_rls=True
)
inner_has_rule = any("org_id" in str(f) for f in inner_filters)
outer_has_rule = any("org_id" in str(f) for f in outer_filters)
assert inner_has_rule, "Unscoped rule should match PD"
assert outer_has_rule, "Unscoped rule should match VD"
def _make_datasource_with_real_rls(dataset_id: int) -> MagicMock:
"""Create a mock datasource with real get_sqla_row_level_filters."""
datasource = _make_datasource(dataset_id)
# Bind real BaseDatasource method so RLS logic executes against mocked
# security_manager rather than returning MagicMock auto-stub
datasource.get_sqla_row_level_filters = (
lambda **kwargs: BaseDatasource.get_sqla_row_level_filters(datasource, **kwargs)
)
return datasource
def test_scoped_guest_rule_preserved_through_get_predicates_for_table(
app: Flask,
mocker: MockerFixture,
) -> None:
"""
Dataset-scoped guest RLS rules are preserved when
get_predicates_for_table() calls get_sqla_row_level_filters()
with include_global_guest_rls=False.
Exercises the full path: get_predicates_for_table() → DB lookup →
get_sqla_row_level_filters(include_global_guest_rls=False) →
get_guest_rls_filters() returns scoped rule → predicate included.
"""
from sqlalchemy.dialects import sqlite
pd_id = 42
scoped_rule = GuestTokenRlsRule(dataset=str(pd_id), clause="tenant_id = 5")
guest_user = _make_guest_user(rules=[scoped_rule])
mock_pd = _make_datasource_with_real_rls(pd_id)
database = mocker.MagicMock()
database.get_dialect.return_value = sqlite.dialect()
db = mocker.patch("superset.utils.rls.db")
db.session.query().filter().one_or_none.return_value = mock_pd
with (
patch(
"superset.connectors.sqla.models.security_manager.get_rls_filters",
return_value=[],
),
patch(
"superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
wraps=_guest_rls_filter(guest_user),
),
patch(
"superset.connectors.sqla.models.is_feature_enabled",
return_value=True,
),
):
table = Table("physical_table", "public", "examples")
predicates = get_predicates_for_table(table, database, "examples")
assert any("tenant_id" in p for p in predicates), (
f"SECURITY BYPASS via get_predicates_for_table: "
f"dataset-scoped guest rule 'tenant_id = 5' (scoped to PD "
f"id={pd_id}) not found in predicates. Got: {predicates}"
)
def test_global_guest_rule_excluded_through_get_predicates_for_table(
app: Flask,
mocker: MockerFixture,
) -> None:
"""
Global (unscoped) guest RLS rules are excluded when
get_predicates_for_table() calls get_sqla_row_level_filters()
with include_global_guest_rls=False.
This prevents double application: global guest rules match any dataset,
so they would appear both in inner SQL (underlying table) and outer query
(virtual dataset). The fix excludes them from the inner SQL path.
"""
from sqlalchemy.dialects import sqlite
pd_id = 42
global_rule = GuestTokenRlsRule(dataset=None, clause="org_id = 1")
guest_user = _make_guest_user(rules=[global_rule])
mock_pd = _make_datasource_with_real_rls(pd_id)
database = mocker.MagicMock()
database.get_dialect.return_value = sqlite.dialect()
db = mocker.patch("superset.utils.rls.db")
db.session.query().filter().one_or_none.return_value = mock_pd
with (
patch(
"superset.connectors.sqla.models.security_manager.get_rls_filters",
return_value=[],
),
patch(
"superset.connectors.sqla.models.security_manager.get_guest_rls_filters",
wraps=_guest_rls_filter(guest_user),
),
patch(
"superset.connectors.sqla.models.is_feature_enabled",
return_value=True,
),
):
table = Table("physical_table", "public", "examples")
predicates = get_predicates_for_table(table, database, "examples")
assert not any("org_id" in p for p in predicates), (
f"Global guest rule 'org_id = 1' should be excluded from "
f"get_predicates_for_table() to prevent double application "
f"in virtual datasets. Got: {predicates}"
)

View File

@@ -326,3 +326,6 @@ def test_get_predicates_for_table(mocker: MockerFixture) -> None:
table = Table("t1", "public", "examples")
assert get_predicates_for_table(table, database, "examples") == ["c1 = 1"]
dataset.get_sqla_row_level_filters.assert_called_once_with(
include_global_guest_rls=False
)