mirror of
https://github.com/apache/superset.git
synced 2026-05-30 04:39:20 +00:00
fix(odps): address review feedback - security, recursion, typing, tests
- Move security check before ODPS partition detection (auth before backend calls) - Wrap is_odps_partitioned_table in try/except with warning log and fallback - Replace OdpsBaseEngineSpec.get_table_metadata body with NotImplementedError - Fix select_star signature: engine: Engine -> dialect: Dialect (matches base) - Update Optional[X] -> X | None for modern Python typing - Remove broken __eq__ that violated frozen dataclass hash contract - Fix Partition docstring typos and __str__ description - Add warning log when ODPS URI does not match expected pattern - Add tests/unit_tests/db_engine_specs/test_odps.py with 7 unit tests Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -286,6 +286,11 @@ class DatabaseDAO(BaseDAO[Database]):
|
||||
partition_fields = [partition.name for partition in partition_spec]
|
||||
return True, partition_fields
|
||||
return False, []
|
||||
logger.warning(
|
||||
"ODPS sqlalchemy_uri did not match the expected pattern; "
|
||||
"unable to determine partition info for table %r",
|
||||
table_name,
|
||||
)
|
||||
return False, []
|
||||
|
||||
|
||||
|
||||
@@ -1081,14 +1081,23 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
|
||||
raise InvalidPayloadSchemaError(ex) from ex
|
||||
table_name = str(parameters["name"])
|
||||
table = Table(table_name, parameters["schema"], parameters["catalog"])
|
||||
is_partitioned_table, partition_fields = DatabaseDAO.is_odps_partitioned_table(
|
||||
database, table_name
|
||||
)
|
||||
try:
|
||||
security_manager.raise_for_access(database=database, table=table)
|
||||
except SupersetSecurityException as ex:
|
||||
# instead of raising 403, raise 404 to hide table existence
|
||||
raise TableNotFoundException("No such table") from ex
|
||||
try:
|
||||
is_partitioned_table, partition_fields = (
|
||||
DatabaseDAO.is_odps_partitioned_table(database, table_name)
|
||||
)
|
||||
except Exception as ex: # pylint: disable=broad-except
|
||||
logger.warning(
|
||||
"Error determining ODPS partition info for table %s: %s; "
|
||||
"falling back to non-partitioned path",
|
||||
table_name,
|
||||
error_msg_from_exception(ex),
|
||||
)
|
||||
is_partitioned_table, partition_fields = False, []
|
||||
partition = Partition(is_partitioned_table, partition_fields)
|
||||
if is_partitioned_table:
|
||||
from superset.db_engine_specs.odps import OdpsEngineSpec
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from typing import Any, Optional, TYPE_CHECKING
|
||||
from typing import Any, TYPE_CHECKING
|
||||
|
||||
from sqlalchemy import select, text
|
||||
from sqlalchemy.engine.base import Engine
|
||||
from sqlalchemy.engine import Dialect
|
||||
|
||||
from superset.databases.schemas import (
|
||||
TableMetadataColumnsResponse,
|
||||
@@ -47,7 +47,7 @@ class OdpsBaseEngineSpec(BaseEngineSpec):
|
||||
cls,
|
||||
database: Database,
|
||||
table: Table,
|
||||
partition: Optional[Partition] = None,
|
||||
partition: Partition | None = None,
|
||||
) -> TableMetadataResponse:
|
||||
"""
|
||||
Returns basic table metadata
|
||||
@@ -56,7 +56,7 @@ class OdpsBaseEngineSpec(BaseEngineSpec):
|
||||
:param partition: A Table partition info
|
||||
:return: Basic table metadata
|
||||
"""
|
||||
return cls.get_table_metadata(database, table, partition)
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
|
||||
@@ -66,7 +66,7 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
|
||||
|
||||
@classmethod
|
||||
def get_table_metadata(
|
||||
cls, database: Any, table: Table, partition: Optional[Partition] = None
|
||||
cls, database: Any, table: Table, partition: Partition | None = None
|
||||
) -> TableMetadataResponse:
|
||||
"""
|
||||
Get table metadata information, including type, pk, fks.
|
||||
@@ -112,7 +112,7 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
|
||||
"selectStar": cls.select_star(
|
||||
database=database,
|
||||
table=table,
|
||||
engine=engine,
|
||||
dialect=engine.dialect,
|
||||
limit=100,
|
||||
show_cols=False,
|
||||
indent=True,
|
||||
@@ -131,13 +131,13 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
|
||||
cls,
|
||||
database: Database,
|
||||
table: Table,
|
||||
engine: Engine,
|
||||
dialect: Dialect,
|
||||
limit: int = 100,
|
||||
show_cols: bool = False,
|
||||
indent: bool = True,
|
||||
latest_partition: bool = True,
|
||||
cols: list[ResultSetColumnType] | None = None,
|
||||
partition: Optional[Partition] = None,
|
||||
partition: Partition | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Generate a "SELECT * from [schema.]table_name" query with appropriate limit.
|
||||
@@ -147,7 +147,7 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
|
||||
:param partition: The table's partition info
|
||||
:param database: Database instance
|
||||
:param table: Table instance
|
||||
:param engine: SqlAlchemy Engine instance
|
||||
:param dialect: SqlAlchemy Dialect instance
|
||||
:param limit: limit to impose on query
|
||||
:param show_cols: Show columns in query; otherwise use "*"
|
||||
:param indent: Add indentation to query
|
||||
@@ -163,7 +163,7 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
|
||||
|
||||
if show_cols:
|
||||
fields = cls._get_fields(cols)
|
||||
full_table_name = cls.quote_table(table, engine.dialect)
|
||||
full_table_name = cls.quote_table(table, dialect)
|
||||
qry = select(fields).select_from(text(full_table_name))
|
||||
if database.backend == "odps":
|
||||
if (
|
||||
|
||||
@@ -326,10 +326,10 @@ class Table:
|
||||
class Partition:
|
||||
"""
|
||||
Partition object, with two attribute keys:
|
||||
ispartitioned_table and partition_comlumn,
|
||||
is_partitioned_table and partition_column,
|
||||
used to provide partition information
|
||||
Here is an example of an object:
|
||||
{"ispartitioned_table":true,"partition_column":["month","day"]}
|
||||
{"is_partitioned_table": true, "partition_column": ["month", "day"]}
|
||||
"""
|
||||
|
||||
is_partitioned_table: bool
|
||||
@@ -337,7 +337,7 @@ class Partition:
|
||||
|
||||
def __str__(self) -> str:
|
||||
"""
|
||||
Return the partition columns of table name.
|
||||
Return a string representation of the Partition object.
|
||||
"""
|
||||
partition_column_str = (
|
||||
", ".join(map(str, self.partition_column))
|
||||
@@ -349,9 +349,6 @@ class Partition:
|
||||
f"partition_column=[{partition_column_str}])"
|
||||
)
|
||||
|
||||
def __eq__(self, other: Any) -> bool:
|
||||
return str(self) == str(other)
|
||||
|
||||
|
||||
# To avoid unnecessary parsing/formatting of queries, the statement has the concept of
|
||||
# an "internal representation", which is the AST of the SQL statement. For most of the
|
||||
|
||||
184
tests/unit_tests/db_engine_specs/test_odps.py
Normal file
184
tests/unit_tests/db_engine_specs/test_odps.py
Normal file
@@ -0,0 +1,184 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from superset.db_engine_specs.odps import OdpsBaseEngineSpec, OdpsEngineSpec
|
||||
from superset.sql.parse import Partition, Table
|
||||
|
||||
|
||||
def test_odps_base_engine_spec_get_table_metadata_raises() -> None:
|
||||
"""OdpsBaseEngineSpec.get_table_metadata must not be called directly."""
|
||||
with pytest.raises(NotImplementedError):
|
||||
OdpsBaseEngineSpec.get_table_metadata(
|
||||
database=MagicMock(),
|
||||
table=Table("my_table", None, None),
|
||||
)
|
||||
|
||||
|
||||
def test_odps_engine_spec_select_star_no_partition() -> None:
|
||||
"""select_star for a non-partitioned ODPS table produces a plain SELECT *."""
|
||||
database = MagicMock()
|
||||
database.backend = "odps"
|
||||
database.get_columns.return_value = []
|
||||
database.compile_sqla_query.return_value = "SELECT * FROM my_table LIMIT 100"
|
||||
dialect = MagicMock()
|
||||
|
||||
sql = OdpsEngineSpec.select_star(
|
||||
database=database,
|
||||
table=Table("my_table", None, None),
|
||||
dialect=dialect,
|
||||
limit=100,
|
||||
show_cols=False,
|
||||
indent=False,
|
||||
latest_partition=False,
|
||||
partition=None,
|
||||
)
|
||||
|
||||
assert "SELECT" in sql
|
||||
assert "my_table" in sql
|
||||
|
||||
|
||||
def test_odps_engine_spec_select_star_with_partition() -> None:
|
||||
"""select_star for a partitioned ODPS table adds a WHERE clause."""
|
||||
database = MagicMock()
|
||||
database.backend = "odps"
|
||||
database.get_columns.return_value = []
|
||||
database.compile_sqla_query.return_value = (
|
||||
"SELECT * FROM my_table WHERE CAST(month AS STRING) LIKE '%' LIMIT 100"
|
||||
)
|
||||
dialect = MagicMock()
|
||||
partition = Partition(is_partitioned_table=True, partition_column=["month"])
|
||||
|
||||
sql = OdpsEngineSpec.select_star(
|
||||
database=database,
|
||||
table=Table("my_table", None, None),
|
||||
dialect=dialect,
|
||||
limit=100,
|
||||
show_cols=False,
|
||||
indent=False,
|
||||
latest_partition=False,
|
||||
partition=partition,
|
||||
)
|
||||
|
||||
assert "WHERE" in sql
|
||||
|
||||
|
||||
def test_is_odps_partitioned_table_non_odps_backend() -> None:
|
||||
"""Returns (False, []) immediately for non-ODPS databases; no network call made."""
|
||||
from superset.daos.database import DatabaseDAO
|
||||
|
||||
database = MagicMock()
|
||||
database.backend = "postgresql"
|
||||
|
||||
result = DatabaseDAO.is_odps_partitioned_table(database, "some_table")
|
||||
|
||||
assert result == (False, [])
|
||||
|
||||
|
||||
def test_is_odps_partitioned_table_missing_pyodps() -> None:
|
||||
"""Returns (False, []) with a warning when pyodps is not installed."""
|
||||
from superset.daos.database import DatabaseDAO
|
||||
|
||||
database = MagicMock()
|
||||
database.backend = "odps"
|
||||
database.sqlalchemy_uri = (
|
||||
"odps://mykey:mysecret@myproject/?endpoint=http://service.odps.test"
|
||||
)
|
||||
database.password = "mysecret" # noqa: S105
|
||||
|
||||
with patch.dict("sys.modules", {"odps": None}):
|
||||
result = DatabaseDAO.is_odps_partitioned_table(database, "some_table")
|
||||
|
||||
assert result == (False, [])
|
||||
|
||||
|
||||
def test_is_odps_partitioned_table_uri_no_match(
|
||||
caplog: pytest.LogCaptureFixture,
|
||||
) -> None:
|
||||
"""Logs a warning and returns (False, []) when the URI doesn't match the pattern."""
|
||||
from superset.daos.database import DatabaseDAO
|
||||
|
||||
database = MagicMock()
|
||||
database.backend = "odps"
|
||||
database.sqlalchemy_uri = "odps://invalid-uri-format"
|
||||
database.password = "secret" # noqa: S105
|
||||
|
||||
with patch("superset.daos.database.DatabaseDAO.is_odps_partitioned_table.__func__"):
|
||||
pass
|
||||
|
||||
import logging
|
||||
|
||||
with caplog.at_level(logging.WARNING, logger="superset.daos.database"):
|
||||
result = DatabaseDAO.is_odps_partitioned_table(database, "some_table")
|
||||
|
||||
assert result == (False, [])
|
||||
assert "did not match" in caplog.text
|
||||
|
||||
|
||||
def test_is_odps_partitioned_table_partitioned(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Returns (True, [field_names]) for a partitioned ODPS table."""
|
||||
from superset.daos.database import DatabaseDAO
|
||||
|
||||
database = MagicMock()
|
||||
database.backend = "odps"
|
||||
database.sqlalchemy_uri = (
|
||||
"odps://mykey:mysecret@myproject/?endpoint=http://service.odps.test"
|
||||
)
|
||||
database.password = "mysecret" # noqa: S105
|
||||
|
||||
mock_partition = MagicMock()
|
||||
mock_partition.name = "month"
|
||||
mock_table = MagicMock()
|
||||
mock_table.exist_partition = True
|
||||
mock_table.table_schema.partitions = [mock_partition]
|
||||
|
||||
mock_odps_client = MagicMock()
|
||||
mock_odps_client.get_table.return_value = mock_table
|
||||
mock_odps_class = MagicMock(return_value=mock_odps_client)
|
||||
|
||||
with patch.dict("sys.modules", {"odps": MagicMock(ODPS=mock_odps_class)}):
|
||||
with patch("superset.daos.database.ODPS", mock_odps_class, create=True):
|
||||
result = DatabaseDAO.is_odps_partitioned_table(database, "my_table")
|
||||
|
||||
assert result == (True, ["month"])
|
||||
|
||||
|
||||
def test_is_odps_partitioned_table_not_partitioned(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""Returns (False, []) for a non-partitioned ODPS table."""
|
||||
from superset.daos.database import DatabaseDAO
|
||||
|
||||
database = MagicMock()
|
||||
database.backend = "odps"
|
||||
database.sqlalchemy_uri = (
|
||||
"odps://mykey:mysecret@myproject/?endpoint=http://service.odps.test"
|
||||
)
|
||||
database.password = "mysecret" # noqa: S105
|
||||
|
||||
mock_table = MagicMock()
|
||||
mock_table.exist_partition = False
|
||||
mock_odps_client = MagicMock()
|
||||
mock_odps_client.get_table.return_value = mock_table
|
||||
mock_odps_class = MagicMock(return_value=mock_odps_client)
|
||||
|
||||
with patch("superset.daos.database.ODPS", mock_odps_class, create=True):
|
||||
result = DatabaseDAO.is_odps_partitioned_table(database, "my_table")
|
||||
|
||||
assert result == (False, [])
|
||||
Reference in New Issue
Block a user