fix(initialization): prevent startup failures when database tables don't exist (#34584)

This commit is contained in:
Elizabeth Thompson
2025-08-11 10:49:52 -07:00
committed by GitHub
parent 5e621ceb34
commit 99539c786e
2 changed files with 484 additions and 9 deletions

View File

@@ -36,6 +36,7 @@ from flask_babel import lazy_gettext as _, refresh
from flask_compress import Compress
from flask_session import Session
from sqlalchemy import inspect
from sqlalchemy.exc import OperationalError
from werkzeug.middleware.proxy_fix import ProxyFix
from superset.constants import CHANGE_ME_SECRET_KEY
@@ -81,12 +82,36 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods
self.superset_app = app
self.config = app.config
self.manifest: dict[Any, Any] = {}
self._db_uri_cache: str | None = None # Cache for valid database URIs
@deprecated(details="use self.superset_app instead of self.flask_app") # type: ignore
@property
def flask_app(self) -> SupersetApp:
return self.superset_app
@property
def database_uri(self) -> str:
"""Lazy property for database URI to avoid early config access issues"""
# If we have a cached valid value, return it
if self._db_uri_cache is not None:
return self._db_uri_cache
# Try to get the URI from config
uri = self.config.get("SQLALCHEMY_DATABASE_URI", "")
# Check if this is a fallback value that indicates config isn't ready
if uri and not any(
fallback in uri.lower()
for fallback in ["nouser", "nopassword", "nohost", "nodb"]
):
# Valid URI - cache it and return
self._db_uri_cache = uri
return uri
# Return the fallback value without caching
# This allows retry on next access when config might be ready
return uri
def pre_init(self) -> None:
"""
Called before all other init tasks are complete
@@ -484,13 +509,35 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods
This is called during app initialization but checks table existence
to handle cases where the app starts before database migration.
"""
inspector = inspect(db.engine)
# Check if database URI is a fallback value before trying to connect
db_uri = self.database_uri
if not db_uri or any(
fallback in db_uri.lower()
for fallback in ["nouser", "nopassword", "nohost", "nodb"]
):
logger.warning(
"Database URI appears to be a fallback value. "
"Skipping database-dependent initialization. "
"This may indicate the workspace context is not ready yet."
)
return
# Check if core tables exist (use 'dashboards' as proxy for Superset tables)
if not inspector.has_table("dashboards"):
try:
inspector = inspect(db.engine)
# Check if core tables exist (use 'dashboards' as proxy for Superset tables)
if not inspector.has_table("dashboards"):
logger.debug(
"Superset tables not yet created. Skipping database-dependent "
"initialization. These features will be initialized after "
"migration."
)
return
except OperationalError as e:
logger.debug(
"Superset tables not yet created. Skipping database-dependent "
"initialization. These features will be initialized after migration."
"Error inspecting database tables. Skipping database-dependent "
"initialization: %s",
e,
)
return
@@ -600,7 +647,7 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods
# Simple connection test
db.engine.execute("SELECT 1")
except Exception:
db_uri = self.config.get("SQLALCHEMY_DATABASE_URI", "")
db_uri = self.database_uri
safe_uri = make_url_safe(db_uri) if db_uri else "Not configured"
print(
f"{Fore.RED}ERROR: Cannot connect to database {safe_uri}\n"
@@ -651,9 +698,7 @@ class SupersetAppInitializer: # pylint: disable=too-many-public-methods
set_isolation_level_to = None
if not isolation_level:
backend = make_url_safe(
self.config["SQLALCHEMY_DATABASE_URI"]
).get_backend_name()
backend = make_url_safe(self.database_uri).get_backend_name()
if backend in ("mysql", "postgresql"):
set_isolation_level_to = "READ COMMITTED"

View File

@@ -0,0 +1,430 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from unittest.mock import MagicMock, patch
from sqlalchemy.exc import OperationalError
from superset.initialization import SupersetAppInitializer
class TestSupersetAppInitializer:
@patch("superset.initialization.db")
@patch("superset.initialization.inspect")
@patch("superset.initialization.logger")
def test_init_database_dependent_features_skips_when_no_tables(
self, mock_logger, mock_inspect_func, mock_db
):
"""Test that initialization is skipped when core tables don't exist."""
# Setup
mock_app = MagicMock()
app_initializer = SupersetAppInitializer(mock_app)
mock_inspector = MagicMock()
mock_inspector.has_table.return_value = False
mock_inspect_func.return_value = mock_inspector
mock_db.engine = MagicMock()
# Execute
app_initializer._init_database_dependent_features()
# Assert
mock_inspect_func.assert_called_once_with(mock_db.engine)
mock_inspector.has_table.assert_called_once_with("dashboards")
mock_logger.debug.assert_called_once_with(
"Superset tables not yet created. Skipping database-dependent "
"initialization. These features will be initialized after "
"migration."
)
@patch("superset.initialization.db")
@patch("superset.initialization.inspect")
@patch("superset.initialization.logger")
def test_init_database_dependent_features_handles_operational_error(
self, mock_logger, mock_inspect_func, mock_db
):
"""Test that OperationalError during inspection is handled gracefully."""
# Setup
mock_app = MagicMock()
app_initializer = SupersetAppInitializer(mock_app)
error_msg = "Cannot connect to database"
mock_inspect_func.side_effect = OperationalError(error_msg, None, None)
mock_db.engine = MagicMock()
# Execute
app_initializer._init_database_dependent_features()
# Assert
mock_inspect_func.assert_called_once_with(mock_db.engine)
mock_logger.debug.assert_called_once()
call_args = mock_logger.debug.call_args
assert "Error inspecting database tables" in call_args[0][0]
# The error is passed as second argument with %s formatting
assert str(call_args[0][1]) == str(OperationalError(error_msg, None, None))
@patch("superset.initialization.db")
@patch("superset.initialization.inspect")
@patch("superset.initialization.feature_flag_manager")
@patch("superset.initialization.register_sqla_event_listeners")
@patch("superset.initialization.logger")
@patch("superset.commands.theme.seed.SeedSystemThemesCommand")
def test_init_database_dependent_features_initializes_when_tables_exist(
self,
mock_seed_themes_command,
mock_logger,
mock_register_listeners,
mock_feature_flag_manager,
mock_inspect_func,
mock_db,
):
"""Test that features are initialized when database tables exist."""
# Setup
mock_app = MagicMock()
app_initializer = SupersetAppInitializer(mock_app)
mock_inspector = MagicMock()
mock_inspector.has_table.return_value = True
mock_inspect_func.return_value = mock_inspector
mock_db.engine = MagicMock()
mock_feature_flag_manager.is_feature_enabled.return_value = True
mock_seed_themes = MagicMock()
mock_seed_themes_command.return_value = mock_seed_themes
# Execute
app_initializer._init_database_dependent_features()
# Assert
mock_inspect_func.assert_called_once_with(mock_db.engine)
# Check both tables are checked
assert mock_inspector.has_table.call_count == 2
mock_inspector.has_table.assert_any_call("dashboards")
mock_inspector.has_table.assert_any_call("themes")
mock_feature_flag_manager.is_feature_enabled.assert_called_with(
"TAGGING_SYSTEM"
)
mock_register_listeners.assert_called_once()
# Should seed themes
mock_seed_themes_command.assert_called_once()
mock_seed_themes.run.assert_called_once()
# Should not log skip message when tables exist
mock_logger.debug.assert_not_called()
@patch("superset.initialization.db")
@patch("superset.initialization.inspect")
@patch("superset.initialization.feature_flag_manager")
@patch("superset.initialization.register_sqla_event_listeners")
@patch("superset.commands.theme.seed.SeedSystemThemesCommand")
def test_init_database_dependent_features_skips_tagging_when_disabled(
self,
mock_seed_themes_command,
mock_register_listeners,
mock_feature_flag_manager,
mock_inspect_func,
mock_db,
):
"""Test that tagging system is not initialized when feature flag is disabled."""
# Setup
mock_app = MagicMock()
app_initializer = SupersetAppInitializer(mock_app)
mock_inspector = MagicMock()
mock_inspector.has_table.return_value = True
mock_inspect_func.return_value = mock_inspector
mock_db.engine = MagicMock()
mock_feature_flag_manager.is_feature_enabled.return_value = False
mock_seed_themes = MagicMock()
mock_seed_themes_command.return_value = mock_seed_themes
# Execute
app_initializer._init_database_dependent_features()
# Assert
mock_feature_flag_manager.is_feature_enabled.assert_called_with(
"TAGGING_SYSTEM"
)
mock_register_listeners.assert_not_called()
# Check both tables are checked
assert mock_inspector.has_table.call_count == 2
mock_inspector.has_table.assert_any_call("dashboards")
mock_inspector.has_table.assert_any_call("themes")
@patch("superset.initialization.db")
@patch("superset.initialization.inspect")
@patch("superset.initialization.logger")
def test_init_database_dependent_features_handles_inspector_error_in_has_table(
self, mock_logger, mock_inspect_func, mock_db
):
"""Test that OperationalError from has_table check is handled gracefully."""
# Setup
mock_app = MagicMock()
app_initializer = SupersetAppInitializer(mock_app)
mock_inspector = MagicMock()
error_msg = "Table check failed"
mock_inspector.has_table.side_effect = OperationalError(error_msg, None, None)
mock_inspect_func.return_value = mock_inspector
mock_db.engine = MagicMock()
# Execute
app_initializer._init_database_dependent_features()
# Assert
mock_inspect_func.assert_called_once_with(mock_db.engine)
mock_inspector.has_table.assert_called_once_with("dashboards")
# Should handle the error gracefully
mock_logger.debug.assert_called_once()
call_args = mock_logger.debug.call_args
assert "Error inspecting database tables" in call_args[0][0]
def test_database_uri_lazy_property(self):
"""Test database_uri property uses lazy initialization with smart caching."""
# Setup
mock_app = MagicMock()
test_uri = "postgresql://user:pass@host:5432/testdb"
mock_app.config = {"SQLALCHEMY_DATABASE_URI": test_uri}
app_initializer = SupersetAppInitializer(mock_app)
# Ensure cache is None initially
assert app_initializer._db_uri_cache is None
# First access should set the cache (valid URI)
uri = app_initializer.database_uri
assert uri == test_uri
assert app_initializer._db_uri_cache is not None
assert app_initializer._db_uri_cache == test_uri
# Second access should use cache (not call config.get again)
# Change the config to verify cache is being used
mock_app.config["SQLALCHEMY_DATABASE_URI"] = "different_uri"
uri2 = app_initializer.database_uri
assert (
uri2 == test_uri
) # Should still return cached value (not "different_uri")
def test_database_uri_lazy_property_with_missing_config(self):
"""Test that database_uri property returns empty string when config missing."""
# Setup
mock_app = MagicMock()
mock_app.config = {} # Empty config
app_initializer = SupersetAppInitializer(mock_app)
# Should return empty string when config key doesn't exist
uri = app_initializer.database_uri
assert uri == ""
# Empty string is a fallback value, so it should NOT be cached
assert app_initializer._db_uri_cache is None
def test_database_uri_prevents_nouser_fallback(self):
"""Test that lazy initialization prevents nouser fallback during deployment."""
# Setup - simulate deployment scenario where config is loaded properly
mock_app = MagicMock()
# Config is properly loaded with real database URI (not nouser)
mock_app.config = {
"SQLALCHEMY_DATABASE_URI": "postgresql://realuser:realpass@realhost:5432/realdb"
}
app_initializer = SupersetAppInitializer(mock_app)
# Access the database URI - should get the real URI, not nouser
uri = app_initializer.database_uri
assert uri == "postgresql://realuser:realpass@realhost:5432/realdb"
assert "nouser" not in uri
assert "nopassword" not in uri
assert "nohost" not in uri
assert "nodb" not in uri
# Verify cache is working
assert app_initializer._db_uri_cache is not None
assert (
app_initializer._db_uri_cache
== "postgresql://realuser:realpass@realhost:5432/realdb"
)
@patch("superset.initialization.make_url_safe")
@patch("superset.initialization.db")
def test_set_db_default_isolation_uses_lazy_property(
self, mock_db, mock_make_url_safe
):
"""Test that set_db_default_isolation uses the lazy database_uri property."""
# Setup
mock_app = MagicMock()
test_uri = "postgresql://user:pass@host:5432/testdb"
mock_app.config = {
"SQLALCHEMY_DATABASE_URI": test_uri,
"SQLALCHEMY_ENGINE_OPTIONS": {},
}
app_initializer = SupersetAppInitializer(mock_app)
# Mock make_url_safe to return a URL with postgresql backend
mock_url = MagicMock()
mock_url.get_backend_name.return_value = "postgresql"
mock_make_url_safe.return_value = mock_url
# Mock db.engine
mock_engine = MagicMock()
mock_db.engine = mock_engine
# Execute
app_initializer.set_db_default_isolation()
# Assert that make_url_safe was called with the lazy property value
mock_make_url_safe.assert_called_once_with(test_uri)
# Should set isolation level to READ COMMITTED for postgresql
mock_engine.execution_options.assert_called_once_with(
isolation_level="READ COMMITTED"
)
# Verify the cache was created
assert app_initializer._db_uri_cache is not None
assert app_initializer._db_uri_cache == test_uri
@patch("superset.initialization.make_url_safe")
@patch("superset.initialization.db")
def test_set_db_default_isolation_with_empty_uri(self, mock_db, mock_make_url_safe):
"""Test that set_db_default_isolation handles empty URI gracefully."""
# Setup
mock_app = MagicMock()
mock_app.config = {
"SQLALCHEMY_DATABASE_URI": "", # Empty URI
"SQLALCHEMY_ENGINE_OPTIONS": {},
}
app_initializer = SupersetAppInitializer(mock_app)
# Mock make_url_safe to return a URL with no backend
mock_url = MagicMock()
mock_url.get_backend_name.return_value = None
mock_make_url_safe.return_value = mock_url
# Execute
app_initializer.set_db_default_isolation()
# Should handle empty URI gracefully
mock_make_url_safe.assert_called_once_with("")
# Should not set isolation level for empty/unknown backend
mock_db.engine.execution_options.assert_not_called()
def test_database_uri_doesnt_cache_fallback_values(self):
"""Test that fallback values like 'nouser' are not cached."""
# Setup
mock_app = MagicMock()
# Initially return the fallback nouser URI
config_dict = {
"SQLALCHEMY_DATABASE_URI": "postgresql://nouser:nopassword@nohost:5432/nodb"
}
mock_app.config = config_dict
app_initializer = SupersetAppInitializer(mock_app)
# First access returns fallback but shouldn't cache it
uri1 = app_initializer.database_uri
assert uri1 == "postgresql://nouser:nopassword@nohost:5432/nodb"
assert app_initializer._db_uri_cache is None # Should NOT be cached
# Now config is properly loaded - update the same dict
config_dict["SQLALCHEMY_DATABASE_URI"] = (
"postgresql://realuser:realpass@realhost:5432/realdb"
)
# Second access should get the new value since fallback wasn't cached
uri2 = app_initializer.database_uri
assert uri2 == "postgresql://realuser:realpass@realhost:5432/realdb"
assert app_initializer._db_uri_cache is not None # Now it should be cached
assert (
app_initializer._db_uri_cache
== "postgresql://realuser:realpass@realhost:5432/realdb"
)
# Third access should use cache even if config changes again
config_dict["SQLALCHEMY_DATABASE_URI"] = (
"postgresql://different:uri@host:5432/db"
)
uri3 = app_initializer.database_uri
assert (
uri3 == "postgresql://realuser:realpass@realhost:5432/realdb"
) # Still cached value
def test_database_uri_caches_valid_uri(self):
"""Test that valid URIs are properly cached."""
# Setup
mock_app = MagicMock()
valid_uri = "postgresql://validuser:validpass@validhost:5432/validdb"
mock_app.config = {"SQLALCHEMY_DATABASE_URI": valid_uri}
app_initializer = SupersetAppInitializer(mock_app)
# First access should cache valid URI
uri1 = app_initializer.database_uri
assert uri1 == valid_uri
assert app_initializer._db_uri_cache is not None
assert app_initializer._db_uri_cache == valid_uri
# Change config
mock_app.config = {
"SQLALCHEMY_DATABASE_URI": "postgresql://changed:uri@host:5432/db"
}
# Second access should still return cached value
uri2 = app_initializer.database_uri
assert uri2 == valid_uri # Still the cached value, not the changed one
def test_database_uri_fallback_patterns(self):
"""Test that various fallback patterns are not cached."""
# Test various fallback patterns
fallback_uris = [
"postgresql://nouser:nopassword@nohost:5432/nodb",
"mysql://NOUSER:NOPASSWORD@NOHOST:3306/NODB",
"postgresql://noUser:pass@host:5432/db", # Contains 'nouser' (case insens.)
"sqlite:///nohost.db", # Contains 'nohost'
"", # Empty string
]
for fallback_uri in fallback_uris:
# Create a fresh initializer for each test
mock_app = MagicMock()
mock_app.config = {"SQLALCHEMY_DATABASE_URI": fallback_uri}
app_initializer = SupersetAppInitializer(mock_app)
uri = app_initializer.database_uri
# Should return the value but not cache it
assert uri == fallback_uri
assert app_initializer._db_uri_cache is None, (
f"Should not cache: {fallback_uri}"
)
@patch("superset.initialization.logger")
@patch("superset.initialization.inspect")
@patch("superset.initialization.db")
def test_init_database_dependent_features_skips_with_fallback_uri(
self, mock_db, mock_inspect, mock_logger
):
"""Test that database-dependent features are skipped when URI is a fallback."""
# Setup
mock_app = MagicMock()
# Set a fallback URI that would cause connection to fail
mock_app.config = {
"SQLALCHEMY_DATABASE_URI": "postgresql://nouser:nopassword@nohost:5432/nodb"
}
app_initializer = SupersetAppInitializer(mock_app)
# Execute
app_initializer._init_database_dependent_features()
# Assert - should not try to inspect database
mock_inspect.assert_not_called()
# Should log warning about fallback URI
mock_logger.warning.assert_called_once()
warning_message = mock_logger.warning.call_args[0][0]
assert "fallback value" in warning_message
assert "workspace context" in warning_message