diff --git a/superset/connectors/sqla/models.py b/superset/connectors/sqla/models.py index 8f7a60d2706..5e8a21db9ab 100644 --- a/superset/connectors/sqla/models.py +++ b/superset/connectors/sqla/models.py @@ -113,6 +113,7 @@ from superset.superset_typing import ( ) from superset.utils import core as utils, json from superset.utils.backports import StrEnum +from superset.data_access_rules.utils import get_hidden_columns_for_table config = current_app.config # Backward compatibility for tests metadata = Model.metadata # pylint: disable=no-member @@ -438,6 +439,21 @@ class BaseDatasource( @property def data(self) -> ExplorableData: """Data representation of the datasource sent to the frontend""" + # Filter hidden columns based on CLS rules + columns_data = [o.data for o in self.columns] + if is_feature_enabled("DATA_ACCESS_RULES") and hasattr(self, "database"): + try: + table = Table(self.datasource_name, self.schema, self.catalog) + hidden_columns = get_hidden_columns_for_table(table, self.database) + if hidden_columns: + columns_data = [ + c for c in columns_data + if c.get("column_name") not in hidden_columns + ] + except Exception: # pylint: disable=broad-except + # Don't fail if CLS check fails, just return all columns + pass + return { # simple fields "id": self.id, @@ -462,7 +478,7 @@ class BaseDatasource( # sqla-specific "sql": self.sql, # one to many - "columns": [o.data for o in self.columns], + "columns": columns_data, "metrics": [o.data for o in self.metrics], "folders": self.folders, # TODO deprecate, move logic to JS diff --git a/superset/data_access_rules/utils.py b/superset/data_access_rules/utils.py index 6de09bb3aa3..7ad55b6eff5 100644 --- a/superset/data_access_rules/utils.py +++ b/superset/data_access_rules/utils.py @@ -418,6 +418,70 @@ def get_cls_rules_for_table( return access_info.cls_rules +def get_hidden_columns_for_table( + table: Table, + database: Database, + rules: list[DataAccessRule] | None = None, +) -> set[str]: + """ + Get the set of column names that should be hidden for a table. + + This function checks the CLS rules for the current user and returns + the names of columns that have the "hide" action applied. + + Args: + table: The fully qualified Table object + database: The Database object + rules: Optional list of rules to check (defaults to current user's rules) + + Returns: + Set of column names that should be hidden. + """ + cls_rules = get_cls_rules_for_table(table, database, rules) + + hidden_columns: set[str] = set() + for column_name, action in cls_rules.items(): + if action == CLSAction.HIDE: + hidden_columns.add(column_name) + + return hidden_columns + + +def filter_columns_by_cls( + columns: list[dict[str, Any]], + table: Table, + database: Database, + column_name_key: str = "column_name", +) -> list[dict[str, Any]]: + """ + Filter a list of column dictionaries to exclude hidden columns. + + This function is useful for filtering column metadata returned by + database reflection or dataset APIs. + + Args: + columns: List of column dictionaries + table: The fully qualified Table object + database: The Database object + column_name_key: The key in the column dict that contains the column name + + Returns: + Filtered list of columns with hidden columns removed. + """ + if not is_feature_enabled("DATA_ACCESS_RULES"): + return columns + + hidden_columns = get_hidden_columns_for_table(table, database) + + if not hidden_columns: + return columns + + return [ + col for col in columns + if col.get(column_name_key) not in hidden_columns + ] + + def apply_data_access_rules( database: Database, catalog: str | None, diff --git a/superset/databases/utils.py b/superset/databases/utils.py index e78500665ec..62e60094641 100644 --- a/superset/databases/utils.py +++ b/superset/databases/utils.py @@ -73,6 +73,12 @@ def get_table_metadata(database: Any, table: Table) -> TableMetadataResponse: """ keys = [] columns = database.get_columns(table) + + # Filter out columns hidden by CLS rules (lazy import to avoid circular dependency) + from superset.data_access_rules.utils import filter_columns_by_cls + + columns = filter_columns_by_cls(columns, table, database) + primary_key = database.get_pk_constraint(table) if primary_key and primary_key.get("constrained_columns"): primary_key["column_names"] = primary_key.pop("constrained_columns") diff --git a/tests/unit_tests/data_access_rules/utils_test.py b/tests/unit_tests/data_access_rules/utils_test.py index 686cb5c2bc0..6f67c801040 100644 --- a/tests/unit_tests/data_access_rules/utils_test.py +++ b/tests/unit_tests/data_access_rules/utils_test.py @@ -540,3 +540,229 @@ def test_get_all_group_keys_filtered_by_table(app_context: None): table = Table(table="users", schema="public", catalog=None) keys = get_all_group_keys(database_name="db1", table=table) assert keys == {"key1"} + + +# Tests for get_hidden_columns_for_table +def test_get_hidden_columns_for_table_no_hidden(app_context: None): + """Test getting hidden columns when no columns are hidden.""" + from superset.data_access_rules.utils import get_hidden_columns_for_table + + database = MagicMock() + database.database_name = "mydb" + + rule = MagicMock(spec=DataAccessRule) + rule.rule_dict = { + "allowed": [ + { + "database": "mydb", + "schema": "public", + "table": "users", + "cls": {"email": "mask", "phone": "hash"}, # No "hide" actions + } + ], + "denied": [], + } + + table = Table(table="users", schema="public", catalog=None) + hidden = get_hidden_columns_for_table(table, database, rules=[rule]) + assert hidden == set() + + +def test_get_hidden_columns_for_table_with_hidden(app_context: None): + """Test getting hidden columns when some columns are hidden.""" + from superset.data_access_rules.utils import get_hidden_columns_for_table + + database = MagicMock() + database.database_name = "mydb" + + rule = MagicMock(spec=DataAccessRule) + rule.rule_dict = { + "allowed": [ + { + "database": "mydb", + "schema": "public", + "table": "users", + "cls": {"email": "mask", "ssn": "hide", "password": "hide"}, + } + ], + "denied": [], + } + + table = Table(table="users", schema="public", catalog=None) + hidden = get_hidden_columns_for_table(table, database, rules=[rule]) + assert hidden == {"ssn", "password"} + + +def test_get_hidden_columns_for_table_denied_access(app_context: None): + """Test that denied access returns no hidden columns.""" + from superset.data_access_rules.utils import get_hidden_columns_for_table + + database = MagicMock() + database.database_name = "mydb" + + rule = MagicMock(spec=DataAccessRule) + rule.rule_dict = { + "allowed": [], + "denied": [ + { + "database": "mydb", + "schema": "public", + "table": "users", + } + ], + } + + table = Table(table="users", schema="public", catalog=None) + hidden = get_hidden_columns_for_table(table, database, rules=[rule]) + # Denied access means no CLS rules are returned + assert hidden == set() + + +# Tests for filter_columns_by_cls +def test_filter_columns_by_cls_no_hidden(app_context: None): + """Test filtering columns when no columns are hidden.""" + from superset.data_access_rules.utils import filter_columns_by_cls + + database = MagicMock() + database.database_name = "mydb" + + columns = [ + {"column_name": "id", "type": "INTEGER"}, + {"column_name": "name", "type": "VARCHAR"}, + {"column_name": "email", "type": "VARCHAR"}, + ] + + rule = MagicMock(spec=DataAccessRule) + rule.rule_dict = { + "allowed": [ + {"database": "mydb", "schema": "public", "table": "users"} + ], + "denied": [], + } + + table = Table(table="users", schema="public", catalog=None) + + with patch( + "superset.data_access_rules.utils.is_feature_enabled", + return_value=True, + ): + with patch( + "superset.data_access_rules.utils.get_user_rules", + return_value=[rule], + ): + filtered = filter_columns_by_cls(columns, table, database) + assert len(filtered) == 3 + assert filtered == columns + + +def test_filter_columns_by_cls_with_hidden(app_context: None): + """Test filtering columns when some columns are hidden.""" + from superset.data_access_rules.utils import filter_columns_by_cls + + database = MagicMock() + database.database_name = "mydb" + + columns = [ + {"column_name": "id", "type": "INTEGER"}, + {"column_name": "name", "type": "VARCHAR"}, + {"column_name": "email", "type": "VARCHAR"}, + {"column_name": "ssn", "type": "VARCHAR"}, + ] + + rule = MagicMock(spec=DataAccessRule) + rule.rule_dict = { + "allowed": [ + { + "database": "mydb", + "schema": "public", + "table": "users", + "cls": {"ssn": "hide"}, + } + ], + "denied": [], + } + + table = Table(table="users", schema="public", catalog=None) + + with patch( + "superset.data_access_rules.utils.is_feature_enabled", + return_value=True, + ): + with patch( + "superset.data_access_rules.utils.get_user_rules", + return_value=[rule], + ): + filtered = filter_columns_by_cls(columns, table, database) + assert len(filtered) == 3 + column_names = [c["column_name"] for c in filtered] + assert "ssn" not in column_names + assert "id" in column_names + assert "name" in column_names + assert "email" in column_names + + +def test_filter_columns_by_cls_feature_disabled(app_context: None): + """Test that filtering is skipped when feature flag is disabled.""" + from superset.data_access_rules.utils import filter_columns_by_cls + + database = MagicMock() + database.database_name = "mydb" + + columns = [ + {"column_name": "id", "type": "INTEGER"}, + {"column_name": "ssn", "type": "VARCHAR"}, + ] + + table = Table(table="users", schema="public", catalog=None) + + with patch( + "superset.data_access_rules.utils.is_feature_enabled", + return_value=False, + ): + # Even if there would be hidden columns, they are not filtered + filtered = filter_columns_by_cls(columns, table, database) + assert len(filtered) == 2 + assert filtered == columns + + +def test_filter_columns_by_cls_custom_key(app_context: None): + """Test filtering columns with custom column name key.""" + from superset.data_access_rules.utils import filter_columns_by_cls + + database = MagicMock() + database.database_name = "mydb" + + # Columns with different key structure (like from SQL Lab table metadata) + columns = [ + {"name": "id", "type": "INTEGER"}, + {"name": "ssn", "type": "VARCHAR"}, + ] + + rule = MagicMock(spec=DataAccessRule) + rule.rule_dict = { + "allowed": [ + { + "database": "mydb", + "schema": "public", + "table": "users", + "cls": {"ssn": "hide"}, + } + ], + "denied": [], + } + + table = Table(table="users", schema="public", catalog=None) + + with patch( + "superset.data_access_rules.utils.is_feature_enabled", + return_value=True, + ): + with patch( + "superset.data_access_rules.utils.get_user_rules", + return_value=[rule], + ): + filtered = filter_columns_by_cls( + columns, table, database, column_name_key="name" + ) + assert len(filtered) == 1 + assert filtered[0]["name"] == "id"