refactor: external metadata fetch API (#16193)

* refactor: external metadata api

* fix comments

* fix ut

* fix fe lint

* fix UT

* fix UT

(cherry picked from commit 6cd15d54a0)
This commit is contained in:
Yongjie Zhao
2021-08-13 13:56:42 +01:00
committed by Ville Brofeldt
parent 6d19de8f66
commit 7f799ed00a
7 changed files with 188 additions and 49 deletions

View File

@@ -16,6 +16,7 @@
* specific language governing permissions and limitations * specific language governing permissions and limitations
* under the License. * under the License.
*/ */
import rison from 'rison';
import React from 'react'; import React from 'react';
import PropTypes from 'prop-types'; import PropTypes from 'prop-types';
import { Row, Col } from 'src/common/components'; import { Row, Col } from 'src/common/components';
@@ -485,11 +486,19 @@ class DatasourceEditor extends React.PureComponent {
syncMetadata() { syncMetadata() {
const { datasource } = this.state; const { datasource } = this.state;
const endpoint = `/datasource/external_metadata_by_name/${ const params = {
datasource.type || datasource.datasource_type datasource_type: datasource.type || datasource.datasource_type,
}/${datasource.database.database_name || datasource.database.name}/${ database_name:
datasource.schema datasource.database.database_name || datasource.database.name,
}/${datasource.table_name}/`; schema_name: datasource.schema,
table_name: datasource.table_name,
};
const endpoint = `/datasource/external_metadata_by_name/?q=${rison.encode(
// rison can't encode the undefined value
Object.keys(params).map(key =>
params[key] === undefined ? null : params[key],
),
)}`;
this.setState({ metadataLoading: true }); this.setState({ metadataLoading: true });
SupersetClient.get({ endpoint }) SupersetClient.get({ endpoint })

View File

@@ -18,6 +18,7 @@ from contextlib import closing
from typing import Dict, List, Optional, TYPE_CHECKING from typing import Dict, List, Optional, TYPE_CHECKING
from flask_babel import lazy_gettext as _ from flask_babel import lazy_gettext as _
from sqlalchemy.exc import NoSuchTableError
from sqlalchemy.sql.type_api import TypeEngine from sqlalchemy.sql.type_api import TypeEngine
from superset.errors import ErrorLevel, SupersetError, SupersetErrorType from superset.errors import ErrorLevel, SupersetError, SupersetErrorType
@@ -41,6 +42,10 @@ def get_physical_table_metadata(
db_dialect = database.get_dialect() db_dialect = database.get_dialect()
# ensure empty schema # ensure empty schema
_schema_name = schema_name if schema_name else None _schema_name = schema_name if schema_name else None
# Table does not exist or is not visible to a connection.
if not database.has_table_by_name(table_name, schema=_schema_name):
raise NoSuchTableError
cols = database.get_columns(table_name, schema=_schema_name) cols = database.get_columns(table_name, schema=_schema_name)
for col in cols: for col in cols:
try: try:

View File

@@ -171,7 +171,7 @@ class SupersetAppInitializer:
DatabaseView, DatabaseView,
ExcelToDatabaseView, ExcelToDatabaseView,
) )
from superset.views.datasource import Datasource from superset.views.datasource.views import Datasource
from superset.views.dynamic_plugins import DynamicPluginsView from superset.views.dynamic_plugins import DynamicPluginsView
from superset.views.key_value import KV from superset.views.key_value import KV
from superset.views.log.api import LogRestApi from superset.views.log.api import LogRestApi

View File

@@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

View File

@@ -0,0 +1,54 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from typing import Any
from marshmallow import fields, post_load, Schema
from typing_extensions import TypedDict
class ExternalMetadataParams(TypedDict):
datasource_type: str
database_name: str
schema_name: str
table_name: str
get_external_metadata_schema = {
"datasource_type": "string",
"database_name": "string",
"schema_name": "string",
"table_name": "string",
}
class ExternalMetadataSchema(Schema):
datasource_type = fields.Str(required=True)
database_name = fields.Str(required=True)
schema_name = fields.Str(allow_none=True)
table_name = fields.Str(required=True)
# pylint: disable=no-self-use,unused-argument
@post_load
def normalize(
self, data: ExternalMetadataParams, **kwargs: Any,
) -> ExternalMetadataParams:
return ExternalMetadataParams(
datasource_type=data["datasource_type"],
database_name=data["database_name"],
schema_name=data.get("schema_name", ""),
table_name=data["table_name"],
)

View File

@@ -16,23 +16,39 @@
# under the License. # under the License.
import json import json
from collections import Counter from collections import Counter
from typing import Any
from flask import request from flask import request
from flask_appbuilder import expose from flask_appbuilder import expose
from flask_appbuilder.api import rison
from flask_appbuilder.security.decorators import has_access_api from flask_appbuilder.security.decorators import has_access_api
from flask_babel import _ from flask_babel import _
from marshmallow import ValidationError
from sqlalchemy.exc import NoSuchTableError
from sqlalchemy.orm.exc import NoResultFound
from superset import app, db, event_logger from superset import app, db, event_logger
from superset.connectors.connector_registry import ConnectorRegistry from superset.connectors.connector_registry import ConnectorRegistry
from superset.connectors.sqla.utils import get_physical_table_metadata from superset.connectors.sqla.utils import get_physical_table_metadata
from superset.datasets.commands.exceptions import DatasetForbiddenError from superset.datasets.commands.exceptions import (
DatasetForbiddenError,
DatasetNotFoundError,
)
from superset.exceptions import SupersetException, SupersetSecurityException from superset.exceptions import SupersetException, SupersetSecurityException
from superset.models.core import Database from superset.models.core import Database
from superset.typing import FlaskResponse from superset.typing import FlaskResponse
from superset.views.base import check_ownership from superset.views.base import (
api,
from ..utils.core import parse_js_uri_path_item BaseSupersetView,
from .base import api, BaseSupersetView, handle_api_exception, json_error_response check_ownership,
handle_api_exception,
json_error_response,
)
from superset.views.datasource.schemas import (
ExternalMetadataParams,
ExternalMetadataSchema,
get_external_metadata_schema,
)
class Datasource(BaseSupersetView): class Datasource(BaseSupersetView):
@@ -122,45 +138,43 @@ class Datasource(BaseSupersetView):
return json_error_response(str(ex), status=400) return json_error_response(str(ex), status=400)
return self.json_response(external_metadata) return self.json_response(external_metadata)
@expose( @expose("/external_metadata_by_name/")
"/external_metadata_by_name/<datasource_type>/<database_name>/"
"<schema_name>/<table_name>/"
)
@has_access_api @has_access_api
@api @api
@handle_api_exception @handle_api_exception
def external_metadata_by_name( @rison(get_external_metadata_schema)
self, def external_metadata_by_name(self, **kwargs: Any) -> FlaskResponse:
datasource_type: str,
database_name: str,
schema_name: str,
table_name: str,
) -> FlaskResponse:
"""Gets table metadata from the source system and SQLAlchemy inspector""" """Gets table metadata from the source system and SQLAlchemy inspector"""
database_name = parse_js_uri_path_item(database_name) or "" try:
schema_name = parse_js_uri_path_item(schema_name, eval_undefined=True) or "" params: ExternalMetadataParams = (
table_name = parse_js_uri_path_item(table_name) or "" ExternalMetadataSchema().load(kwargs.get("rison"))
)
except ValidationError as err:
return json_error_response(str(err), status=400)
datasource = ConnectorRegistry.get_datasource_by_name( datasource = ConnectorRegistry.get_datasource_by_name(
session=db.session, session=db.session,
datasource_type=datasource_type, datasource_type=params["datasource_type"],
database_name=database_name, database_name=params["database_name"],
schema=schema_name, schema=params["schema_name"],
datasource_name=table_name, datasource_name=params["table_name"],
) )
try: try:
if datasource is not None: if datasource is not None:
# Get columns from Superset metadata
external_metadata = datasource.external_metadata() external_metadata = datasource.external_metadata()
else: else:
# Use the SQLAlchemy inspector to get columns # Use the SQLAlchemy inspector to get columns
database = ( database = (
db.session.query(Database) db.session.query(Database)
.filter_by(database_name=database_name) .filter_by(database_name=params["database_name"])
.one() .one()
) )
external_metadata = get_physical_table_metadata( external_metadata = get_physical_table_metadata(
database=database, table_name=table_name, schema_name=schema_name, database=database,
table_name=params["table_name"],
schema_name=params["schema_name"],
) )
except SupersetException as ex: except (NoResultFound, NoSuchTableError):
return json_error_response(str(ex), status=400) raise DatasetNotFoundError
return self.json_response(external_metadata) return self.json_response(external_metadata)

View File

@@ -19,6 +19,7 @@ import json
from contextlib import contextmanager from contextlib import contextmanager
from unittest import mock from unittest import mock
import prison
import pytest import pytest
from superset import app, ConnectorRegistry, db from superset import app, ConnectorRegistry, db
@@ -90,11 +91,15 @@ class TestDatasource(SupersetTestCase):
def test_external_metadata_by_name_for_physical_table(self): def test_external_metadata_by_name_for_physical_table(self):
self.login(username="admin") self.login(username="admin")
tbl = self.get_table(name="birth_names") tbl = self.get_table(name="birth_names")
# empty schema need to be represented by undefined params = prison.dumps(
url = ( {
f"/datasource/external_metadata_by_name/table/" "datasource_type": "table",
f"{tbl.database.database_name}/undefined/{tbl.table_name}/" "database_name": tbl.database.database_name,
"schema_name": tbl.schema,
"table_name": tbl.table_name,
}
) )
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url) resp = self.get_json_resp(url)
col_names = {o.get("name") for o in resp} col_names = {o.get("name") for o in resp}
self.assertEqual( self.assertEqual(
@@ -112,33 +117,69 @@ class TestDatasource(SupersetTestCase):
session.add(table) session.add(table)
session.commit() session.commit()
table = self.get_table(name="dummy_sql_table") tbl = self.get_table(name="dummy_sql_table")
# empty schema need to be represented by undefined params = prison.dumps(
url = ( {
f"/datasource/external_metadata_by_name/table/" "datasource_type": "table",
f"{table.database.database_name}/undefined/{table.table_name}/" "database_name": tbl.database.database_name,
"schema_name": tbl.schema,
"table_name": tbl.table_name,
}
) )
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url) resp = self.get_json_resp(url)
assert {o.get("name") for o in resp} == {"intcol", "strcol"} assert {o.get("name") for o in resp} == {"intcol", "strcol"}
session.delete(table) session.delete(tbl)
session.commit() session.commit()
def test_external_metadata_by_name_from_sqla_inspector(self): def test_external_metadata_by_name_from_sqla_inspector(self):
self.login(username="admin") self.login(username="admin")
example_database = get_example_database() example_database = get_example_database()
with create_test_table_context(example_database): with create_test_table_context(example_database):
url = ( params = prison.dumps(
f"/datasource/external_metadata_by_name/table/" {
f"{example_database.database_name}/undefined/test_table/" "datasource_type": "table",
"database_name": example_database.database_name,
"table_name": "test_table",
}
) )
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url) resp = self.get_json_resp(url)
col_names = {o.get("name") for o in resp} col_names = {o.get("name") for o in resp}
self.assertEqual(col_names, {"first", "second"}) self.assertEqual(col_names, {"first", "second"})
url = ( # No databases found
f"/datasource/external_metadata_by_name/table/" f"foobar/undefined/foobar/" params = prison.dumps(
{"datasource_type": "table", "database_name": "foo", "table_name": "bar",}
) )
resp = self.get_json_resp(url, raise_on_error=False) url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.client.get(url)
self.assertEqual(resp.status_code, DatasetNotFoundError.status)
self.assertEqual(
json.loads(resp.data.decode("utf-8")).get("error"),
DatasetNotFoundError.message,
)
# No table found
params = prison.dumps(
{
"datasource_type": "table",
"database_name": example_database.database_name,
"table_name": "fooooooooobarrrrrr",
}
)
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.client.get(url)
self.assertEqual(resp.status_code, DatasetNotFoundError.status)
self.assertEqual(
json.loads(resp.data.decode("utf-8")).get("error"),
DatasetNotFoundError.message,
)
# invalid query params
params = prison.dumps({"datasource_type": "table",})
url = f"/datasource/external_metadata_by_name/?q={params}"
resp = self.get_json_resp(url)
self.assertIn("error", resp) self.assertIn("error", resp)
def test_external_metadata_for_virtual_table_template_params(self): def test_external_metadata_for_virtual_table_template_params(self):