feat(bigquery): show materialized views (#34766)

This commit is contained in:
Beto Dealmeida
2025-08-26 11:21:43 -04:00
committed by GitHub
parent 97b35a4640
commit cb24737825
10 changed files with 302 additions and 30 deletions

View File

@@ -104,6 +104,7 @@ import {
PicCenterOutlined,
PlusCircleOutlined,
PlusOutlined,
ProfileOutlined,
QuestionCircleOutlined,
ReloadOutlined,
RightOutlined,
@@ -242,6 +243,7 @@ const AntdIcons = {
PicCenterOutlined,
PlusCircleOutlined,
PlusOutlined,
ProfileOutlined,
ReloadOutlined,
QuestionCircleOutlined,
RightOutlined,

View File

@@ -29,7 +29,7 @@ import {
} from 'spec/helpers/testing-library';
import { api } from 'src/hooks/apiResources/queryApi';
import fetchMock from 'fetch-mock';
import TableSelector, { TableSelectorMultiple } from '.';
import TableSelector, { TableSelectorMultiple, TableOption } from '.';
const createProps = (props = {}) => ({
database: {
@@ -259,3 +259,34 @@ test('table multi select retain all the values selected', async () => {
expect(selections[0]).toHaveTextContent('table_b');
expect(selections[1]).toHaveTextContent('table_c');
});
test('TableOption renders correct icons for different table types', () => {
// Test regular table
const tableTable = {
value: 'test_table',
type: 'table',
label: 'test_table',
};
const { container: tableContainer } = render(
<TableOption table={tableTable} />,
);
expect(tableContainer.querySelector('.anticon')).toBeInTheDocument();
// Test view
const viewTable = { value: 'test_view', type: 'view', label: 'test_view' };
const { container: viewContainer } = render(
<TableOption table={viewTable} />,
);
expect(viewContainer.querySelector('.anticon')).toBeInTheDocument();
// Test materialized view
const materializedViewTable = {
value: 'test_materialized_view',
type: 'materialized_view',
label: 'test_materialized_view',
};
const { container: mvContainer } = render(
<TableOption table={materializedViewTable} />,
);
expect(mvContainer.querySelector('.anticon')).toBeInTheDocument();
});

View File

@@ -126,9 +126,11 @@ export const TableOption = ({ table }: { table: Table }) => {
return (
<TableLabel title={value}>
{type === 'view' ? (
<Icons.EyeOutlined iconSize="m" />
<Icons.FunctionOutlined iconSize="m" />
) : type === 'materialized_view' ? (
<Icons.ProfileOutlined iconSize="m" />
) : (
<Icons.InsertRowAboveOutlined iconSize="m" />
<Icons.TableOutlined iconSize="m" />
)}
{extra?.certification && (
<CertifiedBadge

View File

@@ -94,6 +94,25 @@ class TablesDatabaseCommand(BaseCommand):
),
)
# Get materialized views if the database supports them
materialized_views = security_manager.get_datasources_accessible_by_user(
database=self._model,
catalog=self._catalog_name,
schema=self._schema_name,
datasource_names=sorted(
DatasourceName(table.table, table.schema, table.catalog)
for table in (
self._model.get_all_materialized_view_names_in_schema(
catalog=self._catalog_name,
schema=self._schema_name,
force=self._force,
cache=self._model.table_cache_enabled,
cache_timeout=self._model.table_cache_timeout,
)
)
),
)
extra_dict_by_name = {
table.name: table.extra_dict
for table in (
@@ -131,11 +150,21 @@ class TablesDatabaseCommand(BaseCommand):
"type": "view",
}
for view in views
]
+ [
{
"value": mv.table,
"type": "materialized_view",
}
for mv in materialized_views
],
key=lambda item: item["value"],
)
payload = {"count": len(tables) + len(views), "result": options}
payload = {
"count": len(tables) + len(views) + len(materialized_views),
"result": options,
}
return payload
except SupersetException:
raise

View File

@@ -1493,6 +1493,18 @@ class BaseEngineSpec: # pylint: disable=too-many-public-methods
views = {re.sub(f"^{schema}\\.", "", view) for view in views}
return views
@classmethod
def get_materialized_view_names(
cls,
database: Database,
inspector: Inspector,
schema: str | None,
) -> set[str]:
"""
Get all materialized views.
"""
return set()
@classmethod
def get_indexes(
cls,

View File

@@ -828,3 +828,97 @@ class BigQueryEngineSpec(BaseEngineSpec): # pylint: disable=too-many-public-met
# If for some reason we get an exception, for example, no new line
# We will return the original exception
return exception
@classmethod
def get_materialized_view_names(
cls,
database: Database,
inspector: Inspector,
schema: str | None,
) -> set[str]:
"""
Get all materialized views from BigQuery.
BigQuery materialized views are not returned by the standard
get_view_names() method, so we need to query INFORMATION_SCHEMA directly.
"""
if not schema:
return set()
# Construct the query to get materialized views from INFORMATION_SCHEMA
if catalog := database.get_default_catalog():
information_schema = f"`{catalog}.{schema}.INFORMATION_SCHEMA.TABLES`"
else:
information_schema = f"`{schema}.INFORMATION_SCHEMA.TABLES`"
# Use string formatting for the table name since it's not user input
# The catalog and schema are from trusted sources (database configuration)
query = f"""
SELECT table_name
FROM {information_schema}
WHERE table_type = 'MATERIALIZED VIEW'
""" # noqa: S608
materialized_views = set()
try:
with database.get_raw_connection(catalog=catalog, schema=schema) as conn:
cursor = conn.cursor()
cursor.execute(query)
materialized_views = {row[0] for row in cursor.fetchall()}
except Exception:
# If we can't fetch materialized views, return empty set
logger.warning(
"Unable to fetch materialized views for schema %s",
schema,
exc_info=True,
)
return materialized_views
@classmethod
def get_view_names(
cls,
database: Database,
inspector: Inspector,
schema: str | None,
) -> set[str]:
"""
Get all views from BigQuery, excluding materialized views.
BigQuery's standard view discovery includes materialized views,
but we want to separate them for proper categorization.
"""
if not schema:
return set()
# Construct the query to get regular views from INFORMATION_SCHEMA
catalog = database.get_default_catalog()
if catalog:
information_schema = f"`{catalog}.{schema}.INFORMATION_SCHEMA.TABLES`"
else:
information_schema = f"`{schema}.INFORMATION_SCHEMA.TABLES`"
# Use string formatting for the table name since it's not user input
# The catalog and schema are from trusted sources (database configuration)
query = f"""
SELECT table_name
FROM {information_schema}
WHERE table_type = 'VIEW'
""" # noqa: S608
views = set()
try:
with database.get_raw_connection(catalog=catalog, schema=schema) as conn:
cursor = conn.cursor()
cursor.execute(query)
views = {row[0] for row in cursor.fetchall()}
except Exception:
# If we can't fetch views, fall back to the default implementation
logger.warning(
"Unable to fetch views for schema %s, falling back to default",
schema,
exc_info=True,
)
return super().get_view_names(database, inspector, schema)
return views

View File

@@ -930,6 +930,44 @@ class Database(Model, AuditMixinNullable, ImportExportMixin): # pylint: disable
except Exception as ex:
raise self.db_engine_spec.get_dbapi_mapped_exception(ex) from ex
@cache_util.memoized_func(
key="db:{self.id}:catalog:{catalog}:schema:{schema}:materialized_view_list",
cache=cache_manager.cache,
)
def get_all_materialized_view_names_in_schema(
self,
catalog: str | None,
schema: str,
) -> set[Table]:
"""Get all materialized views in the specified schema.
Parameters need to be passed as keyword arguments.
For unused parameters, they are referenced in
cache_util.memoized_func decorator.
:param catalog: optional catalog name
:param schema: schema name
:param cache: whether cache is enabled for the function
:param cache_timeout: timeout in seconds for the cache
:param force: whether to force refresh the cache
:return: set of materialized views
"""
try:
with self.get_inspector(catalog=catalog, schema=schema) as inspector:
return {
Table(view, schema, catalog)
for view in self.db_engine_spec.get_materialized_view_names(
database=self,
inspector=inspector,
schema=schema,
)
}
except Exception as ex:
raise self.db_engine_spec.get_dbapi_mapped_exception(ex) from ex
return set()
@contextmanager
def get_inspector(
self,

View File

@@ -1224,6 +1224,9 @@ class TestTablesDatabaseCommand(SupersetTestCase):
patch.object(
database, "get_all_view_names_in_schema", return_value=[]
) as mock_get_all_view_names,
patch.object(
database, "get_all_materialized_view_names_in_schema", return_value=[]
) as mock_get_all_materialized_view_names,
):
command = TablesDatabaseCommand(database.id, None, "schema_name", False)
command.run()
@@ -1243,3 +1246,10 @@ class TestTablesDatabaseCommand(SupersetTestCase):
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)
mock_get_all_materialized_view_names.assert_called_once_with(
catalog="default_catalog",
schema="schema_name",
force=False,
cache=database.table_cache_enabled,
cache_timeout=database.table_cache_timeout,
)

View File

@@ -42,6 +42,7 @@ def database_with_catalog(mocker: MockerFixture) -> MagicMock:
database.get_all_view_names_in_schema.return_value = {
("view1", "schema1", "catalog1"),
}
database.get_all_materialized_view_names_in_schema.return_value = set()
DatabaseDAO = mocker.patch("superset.commands.database.tables.DatabaseDAO") # noqa: N806
DatabaseDAO.find_by_id.return_value = database
@@ -66,6 +67,7 @@ def database_without_catalog(mocker: MockerFixture) -> MagicMock:
database.get_all_view_names_in_schema.return_value = {
("view1", "schema1", None),
}
database.get_all_materialized_view_names_in_schema.return_value = set()
DatabaseDAO = mocker.patch("superset.commands.database.tables.DatabaseDAO") # noqa: N806
DatabaseDAO.find_by_id.return_value = database
@@ -89,6 +91,7 @@ def test_tables_with_catalog(
DatasourceName("table2", "schema1", "catalog1"),
},
{DatasourceName("view1", "schema1", "catalog1")},
set(), # Empty set for materialized views
],
)
@@ -127,6 +130,12 @@ def test_tables_with_catalog(
DatasourceName("view1", "schema1", "catalog1"),
],
),
mocker.call(
database=database_with_catalog,
catalog="catalog1",
schema="schema1",
datasource_names=[],
),
],
)
@@ -155,6 +164,7 @@ def test_tables_without_catalog(
DatasourceName("table2", "schema1"),
},
{DatasourceName("view1", "schema1")},
set(), # Empty set for materialized views
],
)
@@ -193,6 +203,12 @@ def test_tables_without_catalog(
DatasourceName("view1", "schema1"),
],
),
mocker.call(
database=database_without_catalog,
catalog=None,
schema="schema1",
datasource_names=[],
),
],
)

View File

@@ -19,6 +19,7 @@
from datetime import datetime
from typing import Optional
from unittest import mock
import pytest
from pytest_mock import MockerFixture
@@ -451,43 +452,80 @@ def test_adjust_engine_params_catalog_as_host() -> None:
assert str(uri) == "bigquery://other-project/"
def test_adjust_engine_params_catalog_as_database() -> None:
def test_get_materialized_view_names() -> None:
"""
Test passing a custom catalog.
In this test, the original URI has the catalog as the database.
Test get_materialized_view_names method.
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
url = make_url("bigquery:///project")
database = mock.Mock()
database.get_default_catalog.return_value = "my_project"
uri = BigQueryEngineSpec.adjust_engine_params(url, {})[0]
assert str(uri) == "bigquery:///project"
inspector = mock.Mock()
uri = BigQueryEngineSpec.adjust_engine_params(
url,
{},
catalog="other-project",
)[0]
assert str(uri) == "bigquery://other-project/"
# Mock the raw connection and cursor
cursor_mock = mock.Mock()
cursor_mock.fetchall.return_value = [
("materialized_view_1",),
("materialized_view_2",),
]
connection_mock = mock.Mock()
connection_mock.cursor.return_value = cursor_mock
connection_mock.__enter__ = mock.Mock(return_value=connection_mock)
connection_mock.__exit__ = mock.Mock(return_value=None)
database.get_raw_connection.return_value = connection_mock
result = BigQueryEngineSpec.get_materialized_view_names(
database=database, inspector=inspector, schema="my_dataset"
)
assert result == {"materialized_view_1", "materialized_view_2"}
# Verify the SQL query was correct
cursor_mock.execute.assert_called_once()
executed_query = cursor_mock.execute.call_args[0][0]
assert "INFORMATION_SCHEMA.TABLES" in executed_query
assert "table_type = 'MATERIALIZED VIEW'" in executed_query
def test_adjust_engine_params_no_catalog() -> None:
def test_get_view_names_excludes_materialized_views() -> None:
"""
Test passing a custom catalog.
In this test, the original URI has no catalog.
Test get_view_names excludes materialized views.
"""
from superset.db_engine_specs.bigquery import BigQueryEngineSpec
url = make_url("bigquery://")
database = mock.Mock()
database.get_default_catalog.return_value = "my_project"
uri = BigQueryEngineSpec.adjust_engine_params(url, {})[0]
assert str(uri) == "bigquery://"
inspector = mock.Mock()
uri = BigQueryEngineSpec.adjust_engine_params(
url,
{},
catalog="other-project",
)[0]
assert str(uri) == "bigquery://other-project/"
# Mock the raw connection and cursor
cursor_mock = mock.Mock()
# Return only regular views, not materialized views
cursor_mock.fetchall.return_value = [
("regular_view_1",),
("regular_view_2",),
]
connection_mock = mock.Mock()
connection_mock.cursor.return_value = cursor_mock
connection_mock.__enter__ = mock.Mock(return_value=connection_mock)
connection_mock.__exit__ = mock.Mock(return_value=None)
database.get_raw_connection.return_value = connection_mock
result = BigQueryEngineSpec.get_view_names(
database=database, inspector=inspector, schema="my_dataset"
)
assert result == {"regular_view_1", "regular_view_2"}
# Verify the SQL query only gets regular views
cursor_mock.execute.assert_called_once()
executed_query = cursor_mock.execute.call_args[0][0]
assert "INFORMATION_SCHEMA.TABLES" in executed_query
assert "table_type = 'VIEW'" in executed_query
# Ensure it's not querying for materialized views
assert "MATERIALIZED VIEW" not in executed_query