# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. """ Table model. This model was introduced in SIP-68 (https://github.com/apache/superset/issues/14909), and represents a "table" in a given database -- either a physical table or a view. In addition to a table, new models for columns, metrics, and datasets were also introduced. These models are not fully implemented, and shouldn't be used yet. """ from collections.abc import Iterable from typing import Any, Optional, TYPE_CHECKING import sqlalchemy as sa from flask_appbuilder import Model from sqlalchemy import inspect from sqlalchemy.orm import backref, relationship, Session from sqlalchemy.schema import UniqueConstraint from sqlalchemy.sql import and_, or_ from superset.columns.models import Column from superset.connectors.sqla.utils import get_physical_table_metadata from superset.models.core import Database from superset.models.helpers import ( AuditMixinNullable, ExtraJSONMixin, ImportExportMixin, ) from superset.sql_parse import Table as TableName from superset.superset_typing import ResultSetColumnType if TYPE_CHECKING: from superset.datasets.models import Dataset table_column_association_table = sa.Table( "sl_table_columns", Model.metadata, # pylint: disable=no-member sa.Column( "table_id", sa.ForeignKey("sl_tables.id", ondelete="CASCADE"), primary_key=True, ), sa.Column( "column_id", sa.ForeignKey("sl_columns.id", ondelete="CASCADE"), primary_key=True, ), ) class Table(Model, AuditMixinNullable, ExtraJSONMixin, ImportExportMixin): """ A table/view in a database. """ __tablename__ = "sl_tables" # Note this uniqueness constraint is not part of the physical schema, i.e., it does # not exist in the migrations. The reason it does not physically exist is MySQL, # PostgreSQL, etc. have a different interpretation of uniqueness when it comes to NULL # which is problematic given the catalog and schema are optional. __table_args__ = (UniqueConstraint("database_id", "catalog", "schema", "name"),) id = sa.Column(sa.Integer, primary_key=True) database_id = sa.Column(sa.Integer, sa.ForeignKey("dbs.id"), nullable=False) database: Database = relationship( "Database", # TODO (betodealmeida): rename the backref to ``tables`` once we get rid of the # old models. backref=backref("new_tables", cascade="all, delete-orphan"), foreign_keys=[database_id], ) # The relationship between datasets and columns is 1:n, but we use a # many-to-many association table to avoid adding two mutually exclusive # columns(dataset_id and table_id) to Column columns: list[Column] = relationship( "Column", secondary=table_column_association_table, cascade="all, delete-orphan", single_parent=True, # backref is needed for session to skip detaching `dataset` if only `column` # is loaded. backref="tables", ) datasets: list["Dataset"] # will be populated by Dataset.tables backref # We use ``sa.Text`` for these attributes because (1) in modern databases the # performance is the same as ``VARCHAR``[1] and (2) because some table names can be # **really** long (eg, Google Sheets URLs). # # [1] https://www.postgresql.org/docs/9.1/datatype-character.html catalog = sa.Column(sa.Text) schema = sa.Column(sa.Text) name = sa.Column(sa.Text) # Column is managed externally and should be read-only inside Superset is_managed_externally = sa.Column(sa.Boolean, nullable=False, default=False) external_url = sa.Column(sa.Text, nullable=True) @property def fullname(self) -> str: return str(TableName(table=self.name, schema=self.schema, catalog=self.catalog)) def __repr__(self) -> str: return f"" def sync_columns(self) -> None: """Sync table columns with the database. Keep metadata for existing columns""" try: column_metadata = get_physical_table_metadata( self.database, self.name, self.schema ) except Exception: # pylint: disable=broad-except column_metadata = [] existing_columns = {column.name: column for column in self.columns} quote_identifier = self.database.quote_identifier def update_or_create_column(column_meta: ResultSetColumnType) -> Column: column_name: str = column_meta["column_name"] if column_name in existing_columns: column = existing_columns[column_name] else: column = Column(name=column_name) column.type = column_meta["type"] column.is_temporal = column_meta["is_dttm"] column.expression = quote_identifier(column_name) column.is_aggregation = False column.is_physical = True column.is_spatial = False column.is_partition = False # TODO: update with accurate is_partition return column self.columns = [update_or_create_column(col) for col in column_metadata] @staticmethod def bulk_load_or_create( database: Database, table_names: Iterable[TableName], default_schema: Optional[str] = None, sync_columns: Optional[bool] = False, default_props: Optional[dict[str, Any]] = None, ) -> list["Table"]: """ Load or create multiple Table instances. """ if not table_names: return [] if not database.id: raise Exception("Database must be already saved to metastore") default_props = default_props or {} session: Session = inspect(database).session # load existing tables predicate = or_( *[ and_( Table.database_id == database.id, Table.schema == (table.schema or default_schema), Table.name == table.table, ) for table in table_names ] ) all_tables = session.query(Table).filter(predicate).order_by(Table.id).all() # add missing tables and pull its columns existing = {(table.schema, table.name) for table in all_tables} for table in table_names: schema = table.schema or default_schema name = table.table if (schema, name) not in existing: new_table = Table( database=database, database_id=database.id, name=name, schema=schema, catalog=None, **default_props, ) if sync_columns: new_table.sync_columns() all_tables.append(new_table) existing.add((schema, name)) session.add(new_table) return all_tables