feat(SIP-95): new endpoint for table metadata (#28122)

This commit is contained in:
Beto Dealmeida
2024-04-25 12:23:49 -04:00
committed by GitHub
parent 52f8734662
commit 6cf681df68
71 changed files with 1048 additions and 513 deletions

View File

@@ -136,6 +136,7 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
RouteMethod.RELATED,
"tables",
"table_metadata",
"table_metadata_deprecated",
"table_extra_metadata",
"table_extra_metadata_deprecated",
"select_star",
@@ -722,10 +723,10 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
@statsd_metrics
@event_logger.log_this_with_context(
action=lambda self, *args, **kwargs: f"{self.__class__.__name__}"
f".table_metadata",
f".table_metadata_deprecated",
log_to_statsd=False,
)
def table_metadata(
def table_metadata_deprecated(
self, database: Database, table_name: str, schema_name: str
) -> FlaskResponse:
"""Get database table metadata.
@@ -766,16 +767,16 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
500:
$ref: '#/components/responses/500'
"""
self.incr_stats("init", self.table_metadata.__name__)
self.incr_stats("init", self.table_metadata_deprecated.__name__)
try:
table_info = get_table_metadata(database, table_name, schema_name)
table_info = get_table_metadata(database, Table(table_name, schema_name))
except SQLAlchemyError as ex:
self.incr_stats("error", self.table_metadata.__name__)
self.incr_stats("error", self.table_metadata_deprecated.__name__)
return self.response_422(error_msg_from_exception(ex))
except SupersetException as ex:
return self.response(ex.status, message=ex.message)
self.incr_stats("success", self.table_metadata.__name__)
self.incr_stats("success", self.table_metadata_deprecated.__name__)
return self.response(200, **table_info)
@expose("/<int:pk>/table_extra/<path:table_name>/<schema_name>/", methods=("GET",))
@@ -844,7 +845,86 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
payload = database.db_engine_spec.get_extra_table_metadata(database, table)
return self.response(200, **payload)
@expose("/<int:pk>/table_metadata/extra/", methods=("GET",))
@expose("/<int:pk>/table_metadata/", methods=["GET"])
@protect()
@statsd_metrics
@event_logger.log_this_with_context(
action=lambda self, *args, **kwargs: f"{self.__class__.__name__}"
f".table_metadata",
log_to_statsd=False,
)
def table_metadata(self, pk: int) -> FlaskResponse:
"""
Get metadata for a given table.
Optionally, a schema and a catalog can be passed, if different from the default
ones.
---
get:
summary: Get table metadata
description: >-
Metadata associated with the table (columns, indexes, etc.)
parameters:
- in: path
schema:
type: integer
name: pk
description: The database id
- in: query
schema:
type: string
name: table
required: true
description: Table name
- in: query
schema:
type: string
name: schema
description: >-
Optional table schema, if not passed default schema will be used
- in: query
schema:
type: string
name: catalog
description: >-
Optional table catalog, if not passed default catalog will be used
responses:
200:
description: Table metadata information
content:
application/json:
schema:
$ref: "#/components/schemas/TableExtraMetadataResponseSchema"
401:
$ref: '#/components/responses/401'
404:
$ref: '#/components/responses/404'
500:
$ref: '#/components/responses/500'
"""
self.incr_stats("init", self.table_metadata.__name__)
database = DatabaseDAO.find_by_id(pk)
if database is None:
raise DatabaseNotFoundException("No such database")
try:
parameters = QualifiedTableSchema().load(request.args)
except ValidationError as ex:
raise InvalidPayloadSchemaError(ex) from ex
table = Table(parameters["name"], parameters["schema"], parameters["catalog"])
try:
security_manager.raise_for_access(database=database, table=table)
except SupersetSecurityException as ex:
# instead of raising 403, raise 404 to hide table existence
raise TableNotFoundException("No such table") from ex
payload = database.db_engine_spec.get_table_metadata(database, table)
return self.response(200, **payload)
@expose("/<int:pk>/table_metadata/extra/", methods=["GET"])
@protect()
@statsd_metrics
@event_logger.log_this_with_context(
@@ -978,7 +1058,8 @@ class DatabaseRestApi(BaseSupersetModelRestApi):
self.incr_stats("init", self.select_star.__name__)
try:
result = database.select_star(
table_name, schema_name, latest_partition=True
Table(table_name, schema_name),
latest_partition=True,
)
except NoSuchTableError:
self.incr_stats("error", self.select_star.__name__)

View File

@@ -17,11 +17,13 @@
# pylint: disable=unused-argument, too-many-lines
from __future__ import annotations
import inspect
import json
import os
import re
from typing import Any
from typing import Any, TypedDict
from flask import current_app
from flask_babel import lazy_gettext as _
@@ -581,6 +583,49 @@ class DatabaseTestConnectionSchema(DatabaseParametersSchemaMixin, Schema):
ssh_tunnel = fields.Nested(DatabaseSSHTunnel, allow_none=True)
class TableMetadataOptionsResponse(TypedDict):
deferrable: bool
initially: bool
match: bool
ondelete: bool
onupdate: bool
class TableMetadataColumnsResponse(TypedDict, total=False):
keys: list[str]
longType: str
name: str
type: str
duplicates_constraint: str | None
comment: str | None
class TableMetadataForeignKeysIndexesResponse(TypedDict):
column_names: list[str]
name: str
options: TableMetadataOptionsResponse
referred_columns: list[str]
referred_schema: str
referred_table: str
type: str
class TableMetadataPrimaryKeyResponse(TypedDict):
column_names: list[str]
name: str
type: str
class TableMetadataResponse(TypedDict):
name: str
columns: list[TableMetadataColumnsResponse]
foreignKeys: list[TableMetadataForeignKeysIndexesResponse]
indexes: list[TableMetadataForeignKeysIndexesResponse]
primaryKey: TableMetadataPrimaryKeyResponse
selectStar: str
comment: str | None
class TableMetadataOptionsResponseSchema(Schema):
deferrable = fields.Bool()
initially = fields.Bool()

View File

@@ -14,19 +14,29 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any, Optional, Union
from __future__ import annotations
from typing import Any, TYPE_CHECKING
from sqlalchemy.engine.url import make_url, URL
from superset.commands.database.exceptions import DatabaseInvalidError
from superset.sql_parse import Table
if TYPE_CHECKING:
from superset.databases.schemas import (
TableMetadataColumnsResponse,
TableMetadataForeignKeysIndexesResponse,
TableMetadataResponse,
)
def get_foreign_keys_metadata(
database: Any,
table_name: str,
schema_name: Optional[str],
) -> list[dict[str, Any]]:
foreign_keys = database.get_foreign_keys(table_name, schema_name)
table: Table,
) -> list[TableMetadataForeignKeysIndexesResponse]:
foreign_keys = database.get_foreign_keys(table)
for fk in foreign_keys:
fk["column_names"] = fk.pop("constrained_columns")
fk["type"] = "fk"
@@ -34,9 +44,10 @@ def get_foreign_keys_metadata(
def get_indexes_metadata(
database: Any, table_name: str, schema_name: Optional[str]
) -> list[dict[str, Any]]:
indexes = database.get_indexes(table_name, schema_name)
database: Any,
table: Table,
) -> list[TableMetadataForeignKeysIndexesResponse]:
indexes = database.get_indexes(table)
for idx in indexes:
idx["type"] = "index"
return indexes
@@ -51,30 +62,27 @@ def get_col_type(col: dict[Any, Any]) -> str:
return dtype
def get_table_metadata(
database: Any, table_name: str, schema_name: Optional[str]
) -> dict[str, Any]:
def get_table_metadata(database: Any, table: Table) -> TableMetadataResponse:
"""
Get table metadata information, including type, pk, fks.
This function raises SQLAlchemyError when a schema is not found.
:param database: The database model
:param table_name: Table name
:param schema_name: schema name
:param table: Table instance
:return: Dict table metadata ready for API response
"""
keys = []
columns = database.get_columns(table_name, schema_name)
primary_key = database.get_pk_constraint(table_name, schema_name)
columns = database.get_columns(table)
primary_key = database.get_pk_constraint(table)
if primary_key and primary_key.get("constrained_columns"):
primary_key["column_names"] = primary_key.pop("constrained_columns")
primary_key["type"] = "pk"
keys += [primary_key]
foreign_keys = get_foreign_keys_metadata(database, table_name, schema_name)
indexes = get_indexes_metadata(database, table_name, schema_name)
foreign_keys = get_foreign_keys_metadata(database, table)
indexes = get_indexes_metadata(database, table)
keys += foreign_keys + indexes
payload_columns: list[dict[str, Any]] = []
table_comment = database.get_table_comment(table_name, schema_name)
payload_columns: list[TableMetadataColumnsResponse] = []
table_comment = database.get_table_comment(table)
for col in columns:
dtype = get_col_type(col)
payload_columns.append(
@@ -87,11 +95,10 @@ def get_table_metadata(
}
)
return {
"name": table_name,
"name": table.table,
"columns": payload_columns,
"selectStar": database.select_star(
table_name,
schema=schema_name,
table,
indent=True,
cols=columns,
latest_partition=True,
@@ -103,7 +110,7 @@ def get_table_metadata(
}
def make_url_safe(raw_url: Union[str, URL]) -> URL:
def make_url_safe(raw_url: str | URL) -> URL:
"""
Wrapper for SQLAlchemy's make_url(), which tends to raise too detailed of
errors, which inevitably find their way into server logs. ArgumentErrors