fix(odps): address review feedback - security, recursion, typing, tests

- Move security check before ODPS partition detection (auth before backend calls)
- Wrap is_odps_partitioned_table in try/except with warning log and fallback
- Replace OdpsBaseEngineSpec.get_table_metadata body with NotImplementedError
- Fix select_star signature: engine: Engine -> dialect: Dialect (matches base)
- Update Optional[X] -> X | None for modern Python typing
- Remove broken __eq__ that violated frozen dataclass hash contract
- Fix Partition docstring typos and __str__ description
- Add warning log when ODPS URI does not match expected pattern
- Add tests/unit_tests/db_engine_specs/test_odps.py with 7 unit tests

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Claude Code
2026-03-11 14:00:48 -07:00
parent 23b8d323a3
commit 925e20bb04
5 changed files with 214 additions and 19 deletions

View File

@@ -286,6 +286,11 @@ class DatabaseDAO(BaseDAO[Database]):
partition_fields = [partition.name for partition in partition_spec]
return True, partition_fields
return False, []
logger.warning(
"ODPS sqlalchemy_uri did not match the expected pattern; "
"unable to determine partition info for table %r",
table_name,
)
return False, []

View File

@@ -1081,14 +1081,23 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
raise InvalidPayloadSchemaError(ex) from ex
table_name = str(parameters["name"])
table = Table(table_name, parameters["schema"], parameters["catalog"])
is_partitioned_table, partition_fields = DatabaseDAO.is_odps_partitioned_table(
database, table_name
)
try:
security_manager.raise_for_access(database=database, table=table)
except SupersetSecurityException as ex:
# instead of raising 403, raise 404 to hide table existence
raise TableNotFoundException("No such table") from ex
try:
is_partitioned_table, partition_fields = (
DatabaseDAO.is_odps_partitioned_table(database, table_name)
)
except Exception as ex: # pylint: disable=broad-except
logger.warning(
"Error determining ODPS partition info for table %s: %s; "
"falling back to non-partitioned path",
table_name,
error_msg_from_exception(ex),
)
is_partitioned_table, partition_fields = False, []
partition = Partition(is_partitioned_table, partition_fields)
if is_partitioned_table:
from superset.db_engine_specs.odps import OdpsEngineSpec

View File

@@ -17,10 +17,10 @@
from __future__ import annotations
import logging
from typing import Any, Optional, TYPE_CHECKING
from typing import Any, TYPE_CHECKING
from sqlalchemy import select, text
from sqlalchemy.engine.base import Engine
from sqlalchemy.engine import Dialect
from superset.databases.schemas import (
TableMetadataColumnsResponse,
@@ -47,7 +47,7 @@ class OdpsBaseEngineSpec(BaseEngineSpec):
cls,
database: Database,
table: Table,
partition: Optional[Partition] = None,
partition: Partition | None = None,
) -> TableMetadataResponse:
"""
Returns basic table metadata
@@ -56,7 +56,7 @@ class OdpsBaseEngineSpec(BaseEngineSpec):
:param partition: A Table partition info
:return: Basic table metadata
"""
return cls.get_table_metadata(database, table, partition)
raise NotImplementedError
class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
@@ -66,7 +66,7 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
@classmethod
def get_table_metadata(
cls, database: Any, table: Table, partition: Optional[Partition] = None
cls, database: Any, table: Table, partition: Partition | None = None
) -> TableMetadataResponse:
"""
Get table metadata information, including type, pk, fks.
@@ -112,7 +112,7 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
"selectStar": cls.select_star(
database=database,
table=table,
engine=engine,
dialect=engine.dialect,
limit=100,
show_cols=False,
indent=True,
@@ -131,13 +131,13 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
cls,
database: Database,
table: Table,
engine: Engine,
dialect: Dialect,
limit: int = 100,
show_cols: bool = False,
indent: bool = True,
latest_partition: bool = True,
cols: list[ResultSetColumnType] | None = None,
partition: Optional[Partition] = None,
partition: Partition | None = None,
) -> str:
"""
Generate a "SELECT * from [schema.]table_name" query with appropriate limit.
@@ -147,7 +147,7 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
:param partition: The table's partition info
:param database: Database instance
:param table: Table instance
:param engine: SqlAlchemy Engine instance
:param dialect: SqlAlchemy Dialect instance
:param limit: limit to impose on query
:param show_cols: Show columns in query; otherwise use "*"
:param indent: Add indentation to query
@@ -163,7 +163,7 @@ class OdpsEngineSpec(BasicParametersMixin, OdpsBaseEngineSpec):
if show_cols:
fields = cls._get_fields(cols)
full_table_name = cls.quote_table(table, engine.dialect)
full_table_name = cls.quote_table(table, dialect)
qry = select(fields).select_from(text(full_table_name))
if database.backend == "odps":
if (

View File

@@ -326,10 +326,10 @@ class Table:
class Partition:
"""
Partition object, with two attribute keys:
ispartitioned_table and partition_comlumn,
is_partitioned_table and partition_column,
used to provide partition information
Here is an example of an object:
{"ispartitioned_table":true,"partition_column":["month","day"]}
{"is_partitioned_table": true, "partition_column": ["month", "day"]}
"""
is_partitioned_table: bool
@@ -337,7 +337,7 @@ class Partition:
def __str__(self) -> str:
"""
Return the partition columns of table name.
Return a string representation of the Partition object.
"""
partition_column_str = (
", ".join(map(str, self.partition_column))
@@ -349,9 +349,6 @@ class Partition:
f"partition_column=[{partition_column_str}])"
)
def __eq__(self, other: Any) -> bool:
return str(self) == str(other)
# To avoid unnecessary parsing/formatting of queries, the statement has the concept of
# an "internal representation", which is the AST of the SQL statement. For most of the

View File

@@ -0,0 +1,184 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from unittest.mock import MagicMock, patch
import pytest
from superset.db_engine_specs.odps import OdpsBaseEngineSpec, OdpsEngineSpec
from superset.sql.parse import Partition, Table
def test_odps_base_engine_spec_get_table_metadata_raises() -> None:
"""OdpsBaseEngineSpec.get_table_metadata must not be called directly."""
with pytest.raises(NotImplementedError):
OdpsBaseEngineSpec.get_table_metadata(
database=MagicMock(),
table=Table("my_table", None, None),
)
def test_odps_engine_spec_select_star_no_partition() -> None:
"""select_star for a non-partitioned ODPS table produces a plain SELECT *."""
database = MagicMock()
database.backend = "odps"
database.get_columns.return_value = []
database.compile_sqla_query.return_value = "SELECT * FROM my_table LIMIT 100"
dialect = MagicMock()
sql = OdpsEngineSpec.select_star(
database=database,
table=Table("my_table", None, None),
dialect=dialect,
limit=100,
show_cols=False,
indent=False,
latest_partition=False,
partition=None,
)
assert "SELECT" in sql
assert "my_table" in sql
def test_odps_engine_spec_select_star_with_partition() -> None:
"""select_star for a partitioned ODPS table adds a WHERE clause."""
database = MagicMock()
database.backend = "odps"
database.get_columns.return_value = []
database.compile_sqla_query.return_value = (
"SELECT * FROM my_table WHERE CAST(month AS STRING) LIKE '%' LIMIT 100"
)
dialect = MagicMock()
partition = Partition(is_partitioned_table=True, partition_column=["month"])
sql = OdpsEngineSpec.select_star(
database=database,
table=Table("my_table", None, None),
dialect=dialect,
limit=100,
show_cols=False,
indent=False,
latest_partition=False,
partition=partition,
)
assert "WHERE" in sql
def test_is_odps_partitioned_table_non_odps_backend() -> None:
"""Returns (False, []) immediately for non-ODPS databases; no network call made."""
from superset.daos.database import DatabaseDAO
database = MagicMock()
database.backend = "postgresql"
result = DatabaseDAO.is_odps_partitioned_table(database, "some_table")
assert result == (False, [])
def test_is_odps_partitioned_table_missing_pyodps() -> None:
"""Returns (False, []) with a warning when pyodps is not installed."""
from superset.daos.database import DatabaseDAO
database = MagicMock()
database.backend = "odps"
database.sqlalchemy_uri = (
"odps://mykey:mysecret@myproject/?endpoint=http://service.odps.test"
)
database.password = "mysecret" # noqa: S105
with patch.dict("sys.modules", {"odps": None}):
result = DatabaseDAO.is_odps_partitioned_table(database, "some_table")
assert result == (False, [])
def test_is_odps_partitioned_table_uri_no_match(
caplog: pytest.LogCaptureFixture,
) -> None:
"""Logs a warning and returns (False, []) when the URI doesn't match the pattern."""
from superset.daos.database import DatabaseDAO
database = MagicMock()
database.backend = "odps"
database.sqlalchemy_uri = "odps://invalid-uri-format"
database.password = "secret" # noqa: S105
with patch("superset.daos.database.DatabaseDAO.is_odps_partitioned_table.__func__"):
pass
import logging
with caplog.at_level(logging.WARNING, logger="superset.daos.database"):
result = DatabaseDAO.is_odps_partitioned_table(database, "some_table")
assert result == (False, [])
assert "did not match" in caplog.text
def test_is_odps_partitioned_table_partitioned(monkeypatch: pytest.MonkeyPatch) -> None:
"""Returns (True, [field_names]) for a partitioned ODPS table."""
from superset.daos.database import DatabaseDAO
database = MagicMock()
database.backend = "odps"
database.sqlalchemy_uri = (
"odps://mykey:mysecret@myproject/?endpoint=http://service.odps.test"
)
database.password = "mysecret" # noqa: S105
mock_partition = MagicMock()
mock_partition.name = "month"
mock_table = MagicMock()
mock_table.exist_partition = True
mock_table.table_schema.partitions = [mock_partition]
mock_odps_client = MagicMock()
mock_odps_client.get_table.return_value = mock_table
mock_odps_class = MagicMock(return_value=mock_odps_client)
with patch.dict("sys.modules", {"odps": MagicMock(ODPS=mock_odps_class)}):
with patch("superset.daos.database.ODPS", mock_odps_class, create=True):
result = DatabaseDAO.is_odps_partitioned_table(database, "my_table")
assert result == (True, ["month"])
def test_is_odps_partitioned_table_not_partitioned(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Returns (False, []) for a non-partitioned ODPS table."""
from superset.daos.database import DatabaseDAO
database = MagicMock()
database.backend = "odps"
database.sqlalchemy_uri = (
"odps://mykey:mysecret@myproject/?endpoint=http://service.odps.test"
)
database.password = "mysecret" # noqa: S105
mock_table = MagicMock()
mock_table.exist_partition = False
mock_odps_client = MagicMock()
mock_odps_client.get_table.return_value = mock_table
mock_odps_class = MagicMock(return_value=mock_odps_client)
with patch("superset.daos.database.ODPS", mock_odps_class, create=True):
result = DatabaseDAO.is_odps_partitioned_table(database, "my_table")
assert result == (False, [])