diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 1514a944e98..b49e9ba4d33 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -740,15 +740,19 @@ class BaseDatasource( def get_sqla_row_level_filters( self, template_processor: Optional[BaseTemplateProcessor] = None, + include_global_guest_rls: bool = True, ) -> list[TextClause]: """ Return the appropriate row level security filters for this table and the - current user. A custom username can be passed when the user is not present in the - Flask global namespace. + current user. :param template_processor: The template processor to apply to the filters. + :param include_global_guest_rls: Whether to include global (unscoped) guest + RLS filters. Set to False for underlying tables in virtual datasets to + prevent double application of global guest rules. Dataset-scoped guest + rules are always included regardless of this parameter. :returns: A list of SQL clauses to be ANDed together. - """ # noqa: E501 + """ template_processor = template_processor or self.get_template_processor() all_filters: list[TextClause] = [] @@ -765,6 +769,8 @@ class BaseDatasource( if is_feature_enabled("EMBEDDED_SUPERSET"): for rule in security_manager.get_guest_rls_filters(self): + if not include_global_guest_rls and not rule.get("dataset"): + continue clause = self.text( f"({template_processor.process_template(rule['clause'])})" ) diff --git a/superset/models/helpers.py b/superset/models/helpers.py index f6f28f63850..704b0f4c56a 100644 --- a/superset/models/helpers.py +++ b/superset/models/helpers.py @@ -903,6 +903,7 @@ class ExploreMixin: # pylint: disable=too-many-public-methods def get_sqla_row_level_filters( self, template_processor: Optional[BaseTemplateProcessor] = None, # pylint: disable=unused-argument + include_global_guest_rls: bool = True, # pylint: disable=unused-argument ) -> list[TextClause]: # TODO: We should refactor this mixin and remove this method # as it exists in the BaseDatasource and is not applicable diff --git a/superset/utils/rls.py b/superset/utils/rls.py index 43f7da9ffe8..05f8ea9058c 100644 --- a/superset/utils/rls.py +++ b/superset/utils/rls.py @@ -98,6 +98,16 @@ def get_predicates_for_table( if not dataset: return [] + # Exclude global (unscoped) guest RLS to prevent double application in + # virtual datasets. Global guest rules will be applied to the outer query + # via get_sqla_row_level_filters() on the virtual dataset itself. + # Dataset-scoped guest rules are still included here because they target + # this specific physical dataset and won't match on the outer query. + # Note: this path is also used by SQL Lab (sql_lab.py, executor.py) via + # apply_rls(). Guest users with the default Public role cannot access SQL Lab + # (PUBLIC_EXCLUDED_VIEW_MENUS in security/manager.py). If the guest role is + # extended to include SQL Lab access, global guest RLS predicates for + # underlying tables would be skipped here. return [ str( predicate.compile( @@ -105,7 +115,9 @@ def get_predicates_for_table( compile_kwargs={"literal_binds": True}, ) ) - for predicate in dataset.get_sqla_row_level_filters() + for predicate in dataset.get_sqla_row_level_filters( + include_global_guest_rls=False + ) ] diff --git a/tests/unit_tests/models/test_double_rls_virtual_dataset.py b/tests/unit_tests/models/test_double_rls_virtual_dataset.py new file mode 100644 index 00000000000..161f1adf7cc --- /dev/null +++ b/tests/unit_tests/models/test_double_rls_virtual_dataset.py @@ -0,0 +1,286 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Tests for double RLS application in virtual datasets (Issue #37359). + +This module tests that guest user RLS filters are applied only once +when querying virtual datasets, not both in the underlying table SQL +and the outer query. +""" + +from __future__ import annotations + +from unittest.mock import MagicMock, patch + +import pytest +from flask import Flask +from sqlalchemy.sql.elements import TextClause + +from superset.connectors.sqla.models import BaseDatasource + + +@pytest.fixture +def mock_datasource() -> MagicMock: + """Create a mock datasource for testing.""" + datasource = MagicMock(spec=BaseDatasource) + datasource.get_template_processor.return_value = MagicMock() + datasource.get_template_processor.return_value.process_template = lambda x: x + datasource.text = lambda x: TextClause(x) + return datasource + + +def test_rls_filters_include_guest_when_enabled( + mock_datasource: MagicMock, + app: Flask, +) -> None: + """ + Test that RLS filters include guest filters when enabled. + + When include_global_guest_rls=True and EMBEDDED_SUPERSET is enabled, + both regular and guest RLS filters should be returned. + """ + regular_filter = MagicMock() + regular_filter.clause = "col1 = 'value1'" + regular_filter.group_key = None + + guest_rule = {"clause": "col2 = 'value2'"} + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[regular_filter], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[guest_rule], + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=True, + ), + ): + # Call with include_global_guest_rls=True + filters = BaseDatasource.get_sqla_row_level_filters( + mock_datasource, include_global_guest_rls=True + ) + + # Should include both regular and guest RLS + assert len(filters) == 2 + filter_strs = [str(f) for f in filters] + assert any("col1" in s for s in filter_strs) + assert any("col2" in s for s in filter_strs) + + +def test_rls_filters_exclude_guest_when_requested( + mock_datasource: MagicMock, + app: Flask, +) -> None: + """ + Test that RLS filters exclude guest filters when requested. + + Issue #37359: When analyzing underlying tables in virtual datasets, + guest RLS should be excluded to prevent double application. + """ + regular_filter = MagicMock() + regular_filter.clause = "col1 = 'value1'" + regular_filter.group_key = None + + guest_rule = {"clause": "col2 = 'value2'"} + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[regular_filter], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[guest_rule], + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=True, + ), + ): + # Call internal API with include_global_guest_rls=False + filters = BaseDatasource.get_sqla_row_level_filters( + mock_datasource, include_global_guest_rls=False + ) + + # Should include only regular RLS, not guest RLS + assert len(filters) == 1 + filter_strs = [str(f) for f in filters] + assert any("col1" in s for s in filter_strs) + assert not any("col2" in s for s in filter_strs) + + +def test_rls_filters_include_guest_by_default( + mock_datasource: MagicMock, + app: Flask, +) -> None: + """ + Test that RLS filters include guest filters by default. + + The default behavior (include_global_guest_rls=True) ensures backwards + compatibility with existing code. + """ + regular_filter = MagicMock() + regular_filter.clause = "col1 = 'value1'" + regular_filter.group_key = None + + guest_rule = {"clause": "col2 = 'value2'"} + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[regular_filter], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[guest_rule], + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=True, + ), + ): + # Call internal API with default include_global_guest_rls=True + filters = BaseDatasource.get_sqla_row_level_filters(mock_datasource) + + # Should include both regular and guest RLS + assert len(filters) == 2 + + +def test_regular_rls_always_included( + mock_datasource: MagicMock, + app: Flask, +) -> None: + """ + Test that regular (non-guest) RLS is always included. + + Even when include_global_guest_rls=False, regular RLS filters must still + be applied to underlying tables in virtual datasets. + """ + regular_filter = MagicMock() + regular_filter.clause = "tenant_id = 123" + regular_filter.group_key = None + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[regular_filter], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[], + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=True, + ), + ): + # Call internal API with include_global_guest_rls=False + filters = BaseDatasource.get_sqla_row_level_filters( + mock_datasource, include_global_guest_rls=False + ) + + # Regular RLS should still be included + assert len(filters) == 1 + assert "tenant_id" in str(filters[0]) + + +def test_guest_rls_skipped_when_feature_disabled( + mock_datasource: MagicMock, + app: Flask, +) -> None: + """ + Test that guest RLS is skipped when EMBEDDED_SUPERSET is disabled. + + This verifies that the feature flag is respected regardless of + the include_global_guest_rls parameter. + """ + regular_filter = MagicMock() + regular_filter.clause = "col1 = 'value1'" + regular_filter.group_key = None + + guest_rule = {"clause": "col2 = 'value2'"} + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[regular_filter], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[guest_rule], + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=False, # Feature disabled + ), + ): + # Even with include_global_guest_rls=True, feature flag takes precedence + filters = BaseDatasource.get_sqla_row_level_filters( + mock_datasource, include_global_guest_rls=True + ) + + # Should include only regular RLS + assert len(filters) == 1 + assert not any("col2" in str(f) for f in filters) + + +def test_filter_grouping_preserved( + mock_datasource: MagicMock, + app: Flask, +) -> None: + """ + Test that filter grouping logic is preserved in internal method. + + Filters with the same group_key should be ORed together, while + different groups are ANDed. + """ + filter1 = MagicMock() + filter1.clause = "region = 'US'" + filter1.group_key = "region_group" + + filter2 = MagicMock() + filter2.clause = "region = 'EU'" + filter2.group_key = "region_group" + + filter3 = MagicMock() + filter3.clause = "active = true" + filter3.group_key = None + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[filter1, filter2, filter3], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + return_value=[], + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=False, + ), + ): + filters = BaseDatasource.get_sqla_row_level_filters( + mock_datasource, include_global_guest_rls=False + ) + + # Should have 2 filters: one ungrouped, one grouped (ORed) + assert len(filters) == 2 diff --git a/tests/unit_tests/security/guest_rls_test.py b/tests/unit_tests/security/guest_rls_test.py new file mode 100644 index 00000000000..6210570d66f --- /dev/null +++ b/tests/unit_tests/security/guest_rls_test.py @@ -0,0 +1,298 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +""" +Tests for guest RLS scoping in virtual dataset scenarios. + +Verifies that dataset-scoped guest RLS rules are correctly applied +when querying through virtual datasets, and that global (unscoped) +guest rules are not duplicated across inner and outer queries. +""" + +from __future__ import annotations + +from typing import Callable +from unittest.mock import MagicMock, patch + +from flask import Flask +from pytest_mock import MockerFixture +from sqlalchemy.sql.elements import TextClause + +from superset.connectors.sqla.models import BaseDatasource +from superset.security.guest_token import ( + GuestToken, + GuestTokenResourceType, + GuestTokenRlsRule, + GuestUser, +) +from superset.sql.parse import Table +from superset.utils.rls import get_predicates_for_table + + +def _make_datasource(dataset_id: int) -> MagicMock: + datasource = MagicMock(spec=BaseDatasource) + datasource.get_template_processor.return_value = MagicMock() + datasource.get_template_processor.return_value.process_template = lambda x: x + datasource.text = lambda x: TextClause(x) + datasource.data = {"id": dataset_id} + datasource.is_rls_supported = True + return datasource + + +def _make_guest_user(rules: list[GuestTokenRlsRule]) -> GuestUser: + token: GuestToken = { + "user": {}, + "resources": [ + {"type": GuestTokenResourceType.DASHBOARD, "id": "test-dashboard-uuid"} + ], + "rls_rules": rules, + "iat": 10, + "exp": 20, + } + return GuestUser(token=token, roles=[]) + + +def _guest_rls_filter( + guest_user: GuestUser, +) -> Callable[[MagicMock], list[GuestTokenRlsRule]]: + """Replicate real get_guest_rls_filters logic from manager.py:2709-2714.""" + + def _filter(dataset: MagicMock) -> list[GuestTokenRlsRule]: + return [ + rule + for rule in guest_user.rls + if not rule.get("dataset") + or str(rule.get("dataset")) == str(dataset.data["id"]) + ] + + return _filter + + +def test_scoped_guest_rule_preserved_in_virtual_dataset(app: Flask) -> None: + """ + Dataset-scoped guest RLS rule is preserved when querying through + a virtual dataset. + + Path A (inner SQL): include_global_guest_rls=False skips only global rules, + but dataset-scoped rules matching PD are kept. + Path B (outer query): get_guest_rls_filters(VD) doesn't match because + rule.dataset=42 != VD.id=99, but that's OK because + the rule was already applied in Path A. + """ + pd_id = 42 + vd_id = 99 + + scoped_rule = GuestTokenRlsRule(dataset=str(pd_id), clause="tenant_id = 5") + guest_user = _make_guest_user(rules=[scoped_rule]) + + mock_pd = _make_datasource(pd_id) + mock_vd = _make_datasource(vd_id) + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + wraps=_guest_rls_filter(guest_user), + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=True, + ), + ): + # PATH A: what get_predicates_for_table() calls on PD + inner_filters = BaseDatasource.get_sqla_row_level_filters( + mock_pd, include_global_guest_rls=False + ) + + # PATH B: what get_sqla_query() calls on VD + outer_filters = BaseDatasource.get_sqla_row_level_filters( + mock_vd, include_global_guest_rls=True + ) + + inner_has_rule = any("tenant_id" in str(f) for f in inner_filters) + outer_has_rule = any("tenant_id" in str(f) for f in outer_filters) + + # Scoped rule must appear in inner SQL (PD matches rule.dataset) + assert inner_has_rule, ( + f"Dataset-scoped guest rule 'tenant_id = 5' (scoped to PD " + f"id={pd_id}) should be applied in inner SQL. " + f"inner_filters={[str(f) for f in inner_filters]}" + ) + # Scoped rule should NOT appear in outer SQL (VD.id != rule.dataset) + assert not outer_has_rule, ( + f"Dataset-scoped guest rule should not match VD id={vd_id}. " + f"outer_filters={[str(f) for f in outer_filters]}" + ) + + +def test_unscoped_guest_rule_duplicated_in_virtual_dataset(app: Flask) -> None: + """ + Unscoped (global) guest rules match ANY dataset — these are the ones + that get duplicated in virtual datasets (the original issue #37359). + + The fix should selectively filter only unscoped rules from inner SQL, + not all guest rules. + """ + pd_id = 42 + vd_id = 99 + + unscoped_rule = GuestTokenRlsRule(dataset=None, clause="org_id = 1") + guest_user = _make_guest_user(rules=[unscoped_rule]) + + mock_pd = _make_datasource(pd_id) + mock_vd = _make_datasource(vd_id) + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + wraps=_guest_rls_filter(guest_user), + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=True, + ), + ): + # Both paths with guest RLS enabled (before the fix) + inner_filters = BaseDatasource.get_sqla_row_level_filters( + mock_pd, include_global_guest_rls=True + ) + outer_filters = BaseDatasource.get_sqla_row_level_filters( + mock_vd, include_global_guest_rls=True + ) + + inner_has_rule = any("org_id" in str(f) for f in inner_filters) + outer_has_rule = any("org_id" in str(f) for f in outer_filters) + + assert inner_has_rule, "Unscoped rule should match PD" + assert outer_has_rule, "Unscoped rule should match VD" + + +def _make_datasource_with_real_rls(dataset_id: int) -> MagicMock: + """Create a mock datasource with real get_sqla_row_level_filters.""" + datasource = _make_datasource(dataset_id) + # Bind real BaseDatasource method so RLS logic executes against mocked + # security_manager rather than returning MagicMock auto-stub + datasource.get_sqla_row_level_filters = ( + lambda **kwargs: BaseDatasource.get_sqla_row_level_filters(datasource, **kwargs) + ) + return datasource + + +def test_scoped_guest_rule_preserved_through_get_predicates_for_table( + app: Flask, + mocker: MockerFixture, +) -> None: + """ + Dataset-scoped guest RLS rules are preserved when + get_predicates_for_table() calls get_sqla_row_level_filters() + with include_global_guest_rls=False. + + Exercises the full path: get_predicates_for_table() → DB lookup → + get_sqla_row_level_filters(include_global_guest_rls=False) → + get_guest_rls_filters() returns scoped rule → predicate included. + """ + from sqlalchemy.dialects import sqlite + + pd_id = 42 + scoped_rule = GuestTokenRlsRule(dataset=str(pd_id), clause="tenant_id = 5") + guest_user = _make_guest_user(rules=[scoped_rule]) + + mock_pd = _make_datasource_with_real_rls(pd_id) + + database = mocker.MagicMock() + database.get_dialect.return_value = sqlite.dialect() + db = mocker.patch("superset.utils.rls.db") + db.session.query().filter().one_or_none.return_value = mock_pd + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + wraps=_guest_rls_filter(guest_user), + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=True, + ), + ): + table = Table("physical_table", "public", "examples") + predicates = get_predicates_for_table(table, database, "examples") + + assert any("tenant_id" in p for p in predicates), ( + f"SECURITY BYPASS via get_predicates_for_table: " + f"dataset-scoped guest rule 'tenant_id = 5' (scoped to PD " + f"id={pd_id}) not found in predicates. Got: {predicates}" + ) + + +def test_global_guest_rule_excluded_through_get_predicates_for_table( + app: Flask, + mocker: MockerFixture, +) -> None: + """ + Global (unscoped) guest RLS rules are excluded when + get_predicates_for_table() calls get_sqla_row_level_filters() + with include_global_guest_rls=False. + + This prevents double application: global guest rules match any dataset, + so they would appear both in inner SQL (underlying table) and outer query + (virtual dataset). The fix excludes them from the inner SQL path. + """ + from sqlalchemy.dialects import sqlite + + pd_id = 42 + global_rule = GuestTokenRlsRule(dataset=None, clause="org_id = 1") + guest_user = _make_guest_user(rules=[global_rule]) + + mock_pd = _make_datasource_with_real_rls(pd_id) + + database = mocker.MagicMock() + database.get_dialect.return_value = sqlite.dialect() + db = mocker.patch("superset.utils.rls.db") + db.session.query().filter().one_or_none.return_value = mock_pd + + with ( + patch( + "superset.connectors.sqla.models.security_manager.get_rls_filters", + return_value=[], + ), + patch( + "superset.connectors.sqla.models.security_manager.get_guest_rls_filters", + wraps=_guest_rls_filter(guest_user), + ), + patch( + "superset.connectors.sqla.models.is_feature_enabled", + return_value=True, + ), + ): + table = Table("physical_table", "public", "examples") + predicates = get_predicates_for_table(table, database, "examples") + + assert not any("org_id" in p for p in predicates), ( + f"Global guest rule 'org_id = 1' should be excluded from " + f"get_predicates_for_table() to prevent double application " + f"in virtual datasets. Got: {predicates}" + ) diff --git a/tests/unit_tests/sql_lab_test.py b/tests/unit_tests/sql_lab_test.py index 9fd3a0ac0e2..f9e3f50cefe 100644 --- a/tests/unit_tests/sql_lab_test.py +++ b/tests/unit_tests/sql_lab_test.py @@ -326,3 +326,6 @@ def test_get_predicates_for_table(mocker: MockerFixture) -> None: table = Table("t1", "public", "examples") assert get_predicates_for_table(table, database, "examples") == ["c1 = 1"] + dataset.get_sqla_row_level_filters.assert_called_once_with( + include_global_guest_rls=False + )