mirror of
https://github.com/apache/superset.git
synced 2026-04-07 18:35:15 +00:00
feat(docs): auto-generate database documentation from lib.py (#36805)
Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
867
docs/scripts/generate-database-docs.mjs
Normal file
867
docs/scripts/generate-database-docs.mjs
Normal file
@@ -0,0 +1,867 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This script generates database documentation data from engine spec metadata.
|
||||
* It outputs a JSON file that can be imported by React components for rendering.
|
||||
*
|
||||
* Usage: node scripts/generate-database-docs.mjs
|
||||
*
|
||||
* The script can run in two modes:
|
||||
* 1. With Flask app (full diagnostics) - requires superset to be installed
|
||||
* 2. Fallback mode (documentation only) - parses engine spec `metadata` attributes via AST
|
||||
*/
|
||||
|
||||
import { spawnSync } from 'child_process';
|
||||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
import { fileURLToPath } from 'url';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const ROOT_DIR = path.resolve(__dirname, '../..');
|
||||
const DOCS_DIR = path.resolve(__dirname, '..');
|
||||
const DATA_OUTPUT_DIR = path.join(DOCS_DIR, 'src/data');
|
||||
const DATA_OUTPUT_FILE = path.join(DATA_OUTPUT_DIR, 'databases.json');
|
||||
const MDX_OUTPUT_DIR = path.join(DOCS_DIR, 'docs/databases');
|
||||
const MDX_SUPPORTED_DIR = path.join(MDX_OUTPUT_DIR, 'supported');
|
||||
|
||||
/**
|
||||
* Try to run the full lib.py script with Flask context
|
||||
*/
|
||||
function tryRunFullScript() {
|
||||
try {
|
||||
console.log('Attempting to run lib.py with Flask context...');
|
||||
const pythonCode = `
|
||||
import sys
|
||||
import json
|
||||
sys.path.insert(0, '.')
|
||||
from superset.app import create_app
|
||||
from superset.db_engine_specs.lib import generate_yaml_docs
|
||||
app = create_app()
|
||||
with app.app_context():
|
||||
docs = generate_yaml_docs()
|
||||
print(json.dumps(docs, default=str))
|
||||
`;
|
||||
const result = spawnSync('python', ['-c', pythonCode], {
|
||||
cwd: ROOT_DIR,
|
||||
encoding: 'utf-8',
|
||||
timeout: 60000,
|
||||
maxBuffer: 10 * 1024 * 1024,
|
||||
env: { ...process.env, SUPERSET_SECRET_KEY: 'docs-build-key' },
|
||||
});
|
||||
|
||||
if (result.error) {
|
||||
throw result.error;
|
||||
}
|
||||
if (result.status !== 0) {
|
||||
throw new Error(result.stderr || 'Python script failed');
|
||||
}
|
||||
return JSON.parse(result.stdout);
|
||||
} catch (error) {
|
||||
console.log('Full script execution failed, using fallback mode...');
|
||||
console.log(' Reason:', error.message?.split('\n')[0] || 'Unknown error');
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract metadata from individual engine spec files using AST parsing
|
||||
* This is the preferred approach - reads directly from spec.metadata attributes
|
||||
* Supports metadata inheritance - child classes inherit and merge with parent metadata
|
||||
*/
|
||||
function extractEngineSpecMetadata() {
|
||||
console.log('Extracting metadata from engine spec files...');
|
||||
console.log(` ROOT_DIR: ${ROOT_DIR}`);
|
||||
|
||||
try {
|
||||
const pythonCode = `
|
||||
import sys
|
||||
import json
|
||||
import ast
|
||||
import os
|
||||
|
||||
def eval_node(node):
|
||||
"""Safely evaluate an AST node as a Python literal."""
|
||||
if node is None:
|
||||
return None
|
||||
if isinstance(node, ast.Constant):
|
||||
return node.value
|
||||
elif isinstance(node, ast.List):
|
||||
return [eval_node(e) for e in node.elts]
|
||||
elif isinstance(node, ast.Dict):
|
||||
result = {}
|
||||
for k, v in zip(node.keys, node.values):
|
||||
if k is not None:
|
||||
key = eval_node(k)
|
||||
if key is not None:
|
||||
result[key] = eval_node(v)
|
||||
return result
|
||||
elif isinstance(node, ast.Name):
|
||||
# Handle True, False, None constants
|
||||
if node.id == 'True':
|
||||
return True
|
||||
elif node.id == 'False':
|
||||
return False
|
||||
elif node.id == 'None':
|
||||
return None
|
||||
return node.id
|
||||
elif isinstance(node, ast.Attribute):
|
||||
# Handle DatabaseCategory.SOMETHING - return just the attribute name
|
||||
return node.attr
|
||||
elif isinstance(node, ast.BinOp) and isinstance(node.op, ast.Add):
|
||||
left, right = eval_node(node.left), eval_node(node.right)
|
||||
if isinstance(left, str) and isinstance(right, str):
|
||||
return left + right
|
||||
return None
|
||||
elif isinstance(node, ast.Tuple):
|
||||
return tuple(eval_node(e) for e in node.elts)
|
||||
elif isinstance(node, ast.JoinedStr):
|
||||
# f-strings - just return a placeholder
|
||||
return "<f-string>"
|
||||
return None
|
||||
|
||||
def deep_merge(base, override):
|
||||
"""Deep merge two dictionaries. Override values take precedence."""
|
||||
if base is None:
|
||||
return override
|
||||
if override is None:
|
||||
return base
|
||||
if not isinstance(base, dict) or not isinstance(override, dict):
|
||||
return override
|
||||
|
||||
# Fields that should NOT be inherited from parent classes
|
||||
# - compatible_databases: Each class defines its own compatible DBs
|
||||
# - categories: Each class defines its own categories (not extended from parent)
|
||||
NON_INHERITABLE_FIELDS = {'compatible_databases', 'categories'}
|
||||
|
||||
result = base.copy()
|
||||
# Remove non-inheritable fields from base (they should only come from the class that defines them)
|
||||
for field in NON_INHERITABLE_FIELDS:
|
||||
result.pop(field, None)
|
||||
|
||||
for key, value in override.items():
|
||||
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
||||
result[key] = deep_merge(result[key], value)
|
||||
elif key in result and isinstance(result[key], list) and isinstance(value, list):
|
||||
# Extend lists from parent (e.g., drivers)
|
||||
result[key] = result[key] + value
|
||||
else:
|
||||
result[key] = value
|
||||
return result
|
||||
|
||||
databases = {}
|
||||
specs_dir = 'superset/db_engine_specs'
|
||||
errors = []
|
||||
debug_info = {
|
||||
"cwd": os.getcwd(),
|
||||
"specs_dir_exists": os.path.isdir(specs_dir),
|
||||
"files_checked": 0,
|
||||
"classes_found": 0,
|
||||
"classes_with_metadata": 0,
|
||||
"inherited_metadata": 0,
|
||||
}
|
||||
|
||||
if not os.path.isdir(specs_dir):
|
||||
print(json.dumps({"error": f"Directory not found: {specs_dir}", "cwd": os.getcwd()}))
|
||||
sys.exit(1)
|
||||
|
||||
# First pass: collect all class info (name, bases, metadata)
|
||||
class_info = {} # class_name -> {bases: [], metadata: {}, engine_name: str, filename: str}
|
||||
|
||||
for filename in sorted(os.listdir(specs_dir)):
|
||||
if not filename.endswith('.py') or filename in ('__init__.py', 'lib.py', 'lint_metadata.py'):
|
||||
continue
|
||||
|
||||
debug_info["files_checked"] += 1
|
||||
filepath = os.path.join(specs_dir, filename)
|
||||
try:
|
||||
with open(filepath) as f:
|
||||
source = f.read()
|
||||
tree = ast.parse(source)
|
||||
|
||||
for node in ast.walk(tree):
|
||||
if not isinstance(node, ast.ClassDef):
|
||||
continue
|
||||
|
||||
# Get base class names
|
||||
base_names = []
|
||||
for b in node.bases:
|
||||
if isinstance(b, ast.Name):
|
||||
base_names.append(b.id)
|
||||
elif isinstance(b, ast.Attribute):
|
||||
base_names.append(b.attr)
|
||||
|
||||
is_engine_spec = any('EngineSpec' in name or 'Mixin' in name for name in base_names)
|
||||
if not is_engine_spec:
|
||||
continue
|
||||
|
||||
# Extract class attributes
|
||||
engine_name = None
|
||||
metadata = None
|
||||
|
||||
for item in node.body:
|
||||
if isinstance(item, ast.Assign):
|
||||
for target in item.targets:
|
||||
if isinstance(target, ast.Name):
|
||||
if target.id == 'engine_name':
|
||||
val = eval_node(item.value)
|
||||
if isinstance(val, str):
|
||||
engine_name = val
|
||||
elif target.id == 'metadata':
|
||||
metadata = eval_node(item.value)
|
||||
|
||||
# Check for engine attribute with non-empty value to distinguish
|
||||
# true base classes from product classes like OceanBaseEngineSpec
|
||||
has_non_empty_engine = False
|
||||
for item in node.body:
|
||||
if isinstance(item, ast.Assign):
|
||||
for target in item.targets:
|
||||
if isinstance(target, ast.Name) and target.id == 'engine':
|
||||
# Check if engine value is non-empty string
|
||||
if isinstance(item.value, ast.Constant):
|
||||
has_non_empty_engine = bool(item.value.value)
|
||||
break
|
||||
|
||||
# True base classes: end with BaseEngineSpec AND don't define engine
|
||||
# or have empty engine (like PostgresBaseEngineSpec with engine = "")
|
||||
is_true_base = (
|
||||
node.name.endswith('BaseEngineSpec') and not has_non_empty_engine
|
||||
) or 'Mixin' in node.name
|
||||
|
||||
# Store class info for inheritance resolution
|
||||
class_info[node.name] = {
|
||||
'bases': base_names,
|
||||
'metadata': metadata,
|
||||
'engine_name': engine_name,
|
||||
'filename': filename,
|
||||
'is_base_or_mixin': is_true_base,
|
||||
}
|
||||
except Exception as e:
|
||||
errors.append(f"{filename}: {str(e)}")
|
||||
|
||||
# Second pass: resolve inheritance and build final metadata
|
||||
def get_inherited_metadata(class_name, visited=None):
|
||||
"""Recursively get metadata from parent classes."""
|
||||
if visited is None:
|
||||
visited = set()
|
||||
if class_name in visited:
|
||||
return {} # Prevent circular inheritance
|
||||
visited.add(class_name)
|
||||
|
||||
info = class_info.get(class_name)
|
||||
if not info:
|
||||
return {}
|
||||
|
||||
# Start with parent metadata
|
||||
inherited = {}
|
||||
for base_name in info['bases']:
|
||||
parent_metadata = get_inherited_metadata(base_name, visited.copy())
|
||||
if parent_metadata:
|
||||
inherited = deep_merge(inherited, parent_metadata)
|
||||
|
||||
# Merge with own metadata (own takes precedence)
|
||||
if info['metadata']:
|
||||
inherited = deep_merge(inherited, info['metadata'])
|
||||
|
||||
return inherited
|
||||
|
||||
for class_name, info in class_info.items():
|
||||
# Skip base classes and mixins
|
||||
if info['is_base_or_mixin']:
|
||||
continue
|
||||
|
||||
debug_info["classes_found"] += 1
|
||||
|
||||
# Get final metadata with inheritance
|
||||
final_metadata = get_inherited_metadata(class_name)
|
||||
|
||||
# Remove compatible_databases if not defined by this class (it's not inheritable)
|
||||
own_metadata = info['metadata'] or {}
|
||||
if 'compatible_databases' not in own_metadata and 'compatible_databases' in final_metadata:
|
||||
del final_metadata['compatible_databases']
|
||||
|
||||
# Track if we inherited anything
|
||||
if final_metadata and final_metadata != own_metadata:
|
||||
debug_info["inherited_metadata"] += 1
|
||||
|
||||
# Use class name as fallback for engine_name
|
||||
display_name = info['engine_name'] or class_name.replace('EngineSpec', '').replace('_', ' ')
|
||||
|
||||
if final_metadata and isinstance(final_metadata, dict) and display_name:
|
||||
debug_info["classes_with_metadata"] += 1
|
||||
databases[display_name] = {
|
||||
'engine': display_name.lower().replace(' ', '_'),
|
||||
'engine_name': display_name,
|
||||
'module': info['filename'][:-3], # Remove .py extension
|
||||
'documentation': final_metadata,
|
||||
'time_grains': {},
|
||||
'score': 0,
|
||||
'max_score': 0,
|
||||
'joins': True,
|
||||
'subqueries': True,
|
||||
'supports_dynamic_schema': False,
|
||||
'supports_catalog': False,
|
||||
'supports_dynamic_catalog': False,
|
||||
'ssh_tunneling': False,
|
||||
'query_cancelation': False,
|
||||
'supports_file_upload': False,
|
||||
'user_impersonation': False,
|
||||
'query_cost_estimation': False,
|
||||
'sql_validation': False,
|
||||
}
|
||||
|
||||
if errors and not databases:
|
||||
print(json.dumps({"error": "Parse errors", "details": errors, "debug": debug_info}), file=sys.stderr)
|
||||
|
||||
# Print debug info to stderr for troubleshooting
|
||||
print(json.dumps(debug_info), file=sys.stderr)
|
||||
|
||||
print(json.dumps(databases, default=str))
|
||||
`;
|
||||
const result = spawnSync('python3', ['-c', pythonCode], {
|
||||
cwd: ROOT_DIR,
|
||||
encoding: 'utf-8',
|
||||
timeout: 30000,
|
||||
maxBuffer: 10 * 1024 * 1024,
|
||||
});
|
||||
|
||||
if (result.error) {
|
||||
throw result.error;
|
||||
}
|
||||
// Log debug info from stderr
|
||||
if (result.stderr) {
|
||||
console.log('Python debug info:', result.stderr.trim());
|
||||
}
|
||||
if (result.status !== 0) {
|
||||
throw new Error(result.stderr || 'Python script failed');
|
||||
}
|
||||
const databases = JSON.parse(result.stdout);
|
||||
if (Object.keys(databases).length === 0) {
|
||||
throw new Error('No metadata found in engine specs');
|
||||
}
|
||||
|
||||
console.log(`Extracted metadata from ${Object.keys(databases).length} engine specs`);
|
||||
return databases;
|
||||
} catch (err) {
|
||||
console.log('Engine spec metadata extraction failed:', err.message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build statistics from the database data
|
||||
*/
|
||||
function buildStatistics(databases) {
|
||||
const stats = {
|
||||
totalDatabases: Object.keys(databases).length,
|
||||
withDocumentation: 0,
|
||||
withConnectionString: 0,
|
||||
withDrivers: 0,
|
||||
withAuthMethods: 0,
|
||||
supportsJoins: 0,
|
||||
supportsSubqueries: 0,
|
||||
supportsDynamicSchema: 0,
|
||||
supportsCatalog: 0,
|
||||
averageScore: 0,
|
||||
maxScore: 0,
|
||||
byCategory: {},
|
||||
};
|
||||
|
||||
let totalScore = 0;
|
||||
|
||||
for (const [name, db] of Object.entries(databases)) {
|
||||
const docs = db.documentation || {};
|
||||
|
||||
if (Object.keys(docs).length > 0) stats.withDocumentation++;
|
||||
if (docs.connection_string || docs.drivers?.length > 0)
|
||||
stats.withConnectionString++;
|
||||
if (docs.drivers?.length > 0) stats.withDrivers++;
|
||||
if (docs.authentication_methods?.length > 0) stats.withAuthMethods++;
|
||||
if (db.joins) stats.supportsJoins++;
|
||||
if (db.subqueries) stats.supportsSubqueries++;
|
||||
if (db.supports_dynamic_schema) stats.supportsDynamicSchema++;
|
||||
if (db.supports_catalog) stats.supportsCatalog++;
|
||||
|
||||
totalScore += db.score || 0;
|
||||
if (db.max_score > stats.maxScore) stats.maxScore = db.max_score;
|
||||
|
||||
// Use categories from documentation metadata (computed by Python)
|
||||
// Each database can belong to multiple categories
|
||||
const categories = docs.categories || ['OTHER'];
|
||||
for (const cat of categories) {
|
||||
// Map category constant names to display names
|
||||
const categoryDisplayNames = {
|
||||
'CLOUD_AWS': 'Cloud - AWS',
|
||||
'CLOUD_GCP': 'Cloud - Google',
|
||||
'CLOUD_AZURE': 'Cloud - Azure',
|
||||
'CLOUD_DATA_WAREHOUSES': 'Cloud Data Warehouses',
|
||||
'APACHE_PROJECTS': 'Apache Projects',
|
||||
'TRADITIONAL_RDBMS': 'Traditional RDBMS',
|
||||
'ANALYTICAL_DATABASES': 'Analytical Databases',
|
||||
'SEARCH_NOSQL': 'Search & NoSQL',
|
||||
'QUERY_ENGINES': 'Query Engines',
|
||||
'TIME_SERIES': 'Time Series Databases',
|
||||
'OTHER': 'Other Databases',
|
||||
'OPEN_SOURCE': 'Open Source',
|
||||
'HOSTED_OPEN_SOURCE': 'Hosted Open Source',
|
||||
'PROPRIETARY': 'Proprietary',
|
||||
};
|
||||
const displayName = categoryDisplayNames[cat] || cat;
|
||||
if (!stats.byCategory[displayName]) {
|
||||
stats.byCategory[displayName] = [];
|
||||
}
|
||||
stats.byCategory[displayName].push(name);
|
||||
}
|
||||
}
|
||||
|
||||
stats.averageScore = Math.round(totalScore / stats.totalDatabases);
|
||||
|
||||
return stats;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert database name to a URL-friendly slug
|
||||
*/
|
||||
function toSlug(name) {
|
||||
return name
|
||||
.toLowerCase()
|
||||
.replace(/[^a-z0-9]+/g, '-')
|
||||
.replace(/^-|-$/g, '');
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate MDX content for a single database page
|
||||
*/
|
||||
function generateDatabaseMDX(name, db) {
|
||||
const description = db.documentation?.description || `Documentation for ${name} database connection.`;
|
||||
const shortDesc = description
|
||||
.slice(0, 160)
|
||||
.replace(/\\/g, '\\\\')
|
||||
.replace(/"/g, '\\"');
|
||||
|
||||
return `---
|
||||
title: ${name}
|
||||
sidebar_label: ${name}
|
||||
description: "${shortDesc}"
|
||||
hide_title: true
|
||||
---
|
||||
|
||||
{/*
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
*/}
|
||||
|
||||
import { DatabasePage } from '@site/src/components/databases';
|
||||
import databaseData from '@site/src/data/databases.json';
|
||||
|
||||
<DatabasePage name="${name}" database={databaseData.databases["${name}"]} />
|
||||
`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate the index MDX for the databases overview
|
||||
*/
|
||||
function generateIndexMDX(statistics, usedFlaskContext = true) {
|
||||
const fallbackNotice = usedFlaskContext ? '' : `
|
||||
:::info Developer Note
|
||||
This documentation was built without Flask context, so feature diagnostics (scores, time grain support, etc.)
|
||||
may not reflect actual database capabilities. For full diagnostics, build docs locally with:
|
||||
|
||||
\`\`\`bash
|
||||
cd docs && npm run gen-db-docs
|
||||
\`\`\`
|
||||
|
||||
This requires a working Superset development environment.
|
||||
:::
|
||||
|
||||
`;
|
||||
|
||||
return `---
|
||||
title: Connecting to Databases
|
||||
sidebar_label: Overview
|
||||
sidebar_position: 1
|
||||
---
|
||||
|
||||
{/*
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
*/}
|
||||
|
||||
import { DatabaseIndex } from '@site/src/components/databases';
|
||||
import databaseData from '@site/src/data/databases.json';
|
||||
|
||||
# Connecting to Databases
|
||||
|
||||
Superset does not ship bundled with connectivity to databases. The main step in connecting
|
||||
Superset to a database is to **install the proper database driver(s)** in your environment.
|
||||
|
||||
:::note
|
||||
You'll need to install the required packages for the database you want to use as your metadata database
|
||||
as well as the packages needed to connect to the databases you want to access through Superset.
|
||||
For information about setting up Superset's metadata database, please refer to
|
||||
installation documentations ([Docker Compose](/docs/installation/docker-compose), [Kubernetes](/docs/installation/kubernetes))
|
||||
:::
|
||||
|
||||
## Supported Databases
|
||||
|
||||
Superset supports **${statistics.totalDatabases} databases** with varying levels of feature support.
|
||||
Click on any database name to see detailed documentation including connection strings,
|
||||
authentication methods, and configuration options.
|
||||
|
||||
<DatabaseIndex data={databaseData} />
|
||||
|
||||
## Installing Database Drivers
|
||||
|
||||
Superset requires a Python [DB-API database driver](https://peps.python.org/pep-0249/)
|
||||
and a [SQLAlchemy dialect](https://docs.sqlalchemy.org/en/20/dialects/) to be installed for
|
||||
each database engine you want to connect to.
|
||||
|
||||
### Installing Drivers in Docker
|
||||
|
||||
For Docker deployments, create a \`requirements-local.txt\` file in the \`docker\` directory:
|
||||
|
||||
\`\`\`bash
|
||||
# Create the requirements file
|
||||
touch ./docker/requirements-local.txt
|
||||
|
||||
# Add your driver (e.g., for PostgreSQL)
|
||||
echo "psycopg2-binary" >> ./docker/requirements-local.txt
|
||||
\`\`\`
|
||||
|
||||
Then restart your containers. The drivers will be installed automatically.
|
||||
|
||||
### Installing Drivers with pip
|
||||
|
||||
For non-Docker installations:
|
||||
|
||||
\`\`\`bash
|
||||
pip install <driver-package>
|
||||
\`\`\`
|
||||
|
||||
See individual database pages for the specific driver packages needed.
|
||||
|
||||
## Connecting Through the UI
|
||||
|
||||
1. Go to **Settings → Data: Database Connections**
|
||||
2. Click **+ DATABASE**
|
||||
3. Select your database type or enter a SQLAlchemy URI
|
||||
4. Click **Test Connection** to verify
|
||||
5. Click **Connect** to save
|
||||
|
||||
## Contributing
|
||||
|
||||
To add or update database documentation, add a \`metadata\` attribute to your engine spec class in
|
||||
\`superset/db_engine_specs/\`. Documentation is auto-generated from these metadata attributes.
|
||||
|
||||
See [METADATA_STATUS.md](https://github.com/apache/superset/blob/master/superset/db_engine_specs/METADATA_STATUS.md)
|
||||
for the current status of database documentation and the [README](https://github.com/apache/superset/blob/master/superset/db_engine_specs/README.md) for the metadata schema.
|
||||
${fallbackNotice}`;
|
||||
}
|
||||
|
||||
const README_PATH = path.join(ROOT_DIR, 'README.md');
|
||||
const README_START_MARKER = '<!-- SUPPORTED_DATABASES_START -->';
|
||||
const README_END_MARKER = '<!-- SUPPORTED_DATABASES_END -->';
|
||||
|
||||
/**
|
||||
* Generate the database logos HTML for README.md
|
||||
* Only includes databases that have logos defined
|
||||
*/
|
||||
function generateReadmeLogos(databases) {
|
||||
// Get databases with logos, sorted alphabetically
|
||||
const dbsWithLogos = Object.entries(databases)
|
||||
.filter(([, db]) => db.documentation?.logo)
|
||||
.sort(([a], [b]) => a.localeCompare(b));
|
||||
|
||||
if (dbsWithLogos.length === 0) {
|
||||
return '';
|
||||
}
|
||||
|
||||
// Generate HTML img tags
|
||||
const logoTags = dbsWithLogos.map(([name, db]) => {
|
||||
const logo = db.documentation.logo;
|
||||
const alt = name.toLowerCase().replace(/\s+/g, '-');
|
||||
// Use docs site URL for logos
|
||||
return ` <img src="https://superset.apache.org/img/databases/${logo}" alt="${alt}" border="0" width="80" height="40" class="database-logo" />`;
|
||||
});
|
||||
|
||||
return `<p align="center">
|
||||
${logoTags.join('\n')}
|
||||
</p>`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Update the README.md with generated database logos
|
||||
*/
|
||||
function updateReadme(databases) {
|
||||
if (!fs.existsSync(README_PATH)) {
|
||||
console.log('README.md not found, skipping update');
|
||||
return false;
|
||||
}
|
||||
|
||||
const content = fs.readFileSync(README_PATH, 'utf-8');
|
||||
|
||||
// Check if markers exist
|
||||
if (!content.includes(README_START_MARKER) || !content.includes(README_END_MARKER)) {
|
||||
console.log('README.md missing database markers, skipping update');
|
||||
console.log(` Add ${README_START_MARKER} and ${README_END_MARKER} to enable auto-generation`);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Generate new logos section
|
||||
const logosHtml = generateReadmeLogos(databases);
|
||||
|
||||
// Replace content between markers
|
||||
const pattern = new RegExp(
|
||||
`${README_START_MARKER}[\\s\\S]*?${README_END_MARKER}`,
|
||||
'g'
|
||||
);
|
||||
const newContent = content.replace(
|
||||
pattern,
|
||||
`${README_START_MARKER}\n${logosHtml}\n${README_END_MARKER}`
|
||||
);
|
||||
|
||||
if (newContent !== content) {
|
||||
fs.writeFileSync(README_PATH, newContent);
|
||||
console.log('Updated README.md database logos');
|
||||
return true;
|
||||
}
|
||||
|
||||
console.log('README.md database logos unchanged');
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Load existing database data if available
|
||||
*/
|
||||
function loadExistingData() {
|
||||
if (!fs.existsSync(DATA_OUTPUT_FILE)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
const content = fs.readFileSync(DATA_OUTPUT_FILE, 'utf-8');
|
||||
return JSON.parse(content);
|
||||
} catch (error) {
|
||||
console.log('Could not load existing data:', error.message);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge new documentation with existing diagnostics
|
||||
* Preserves score, time_grains, and feature flags from existing data
|
||||
*/
|
||||
function mergeWithExistingDiagnostics(newDatabases, existingData) {
|
||||
if (!existingData?.databases) return newDatabases;
|
||||
|
||||
const diagnosticFields = [
|
||||
'score', 'max_score', 'time_grains', 'joins', 'subqueries',
|
||||
'supports_dynamic_schema', 'supports_catalog', 'supports_dynamic_catalog',
|
||||
'ssh_tunneling', 'query_cancelation', 'supports_file_upload',
|
||||
'user_impersonation', 'query_cost_estimation', 'sql_validation'
|
||||
];
|
||||
|
||||
for (const [name, db] of Object.entries(newDatabases)) {
|
||||
const existingDb = existingData.databases[name];
|
||||
if (existingDb && existingDb.score > 0) {
|
||||
// Preserve diagnostics from existing data
|
||||
for (const field of diagnosticFields) {
|
||||
if (existingDb[field] !== undefined) {
|
||||
db[field] = existingDb[field];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const preserved = Object.values(newDatabases).filter(d => d.score > 0).length;
|
||||
if (preserved > 0) {
|
||||
console.log(`Preserved diagnostics for ${preserved} databases from existing data`);
|
||||
}
|
||||
|
||||
return newDatabases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Main function
|
||||
*/
|
||||
async function main() {
|
||||
console.log('Generating database documentation...\n');
|
||||
|
||||
// Ensure output directories exist
|
||||
if (!fs.existsSync(DATA_OUTPUT_DIR)) {
|
||||
fs.mkdirSync(DATA_OUTPUT_DIR, { recursive: true });
|
||||
}
|
||||
if (!fs.existsSync(MDX_OUTPUT_DIR)) {
|
||||
fs.mkdirSync(MDX_OUTPUT_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
// Load existing data for potential merge
|
||||
const existingData = loadExistingData();
|
||||
|
||||
// Try sources in order of preference:
|
||||
// 1. Full script with Flask context (richest data with diagnostics)
|
||||
// 2. Engine spec metadata files (works in CI without Flask)
|
||||
let databases = tryRunFullScript();
|
||||
let usedFlaskContext = !!databases;
|
||||
|
||||
if (!databases) {
|
||||
// Extract from engine spec metadata (preferred for CI)
|
||||
databases = extractEngineSpecMetadata();
|
||||
}
|
||||
|
||||
if (!databases || Object.keys(databases).length === 0) {
|
||||
console.error('Failed to generate database documentation data.');
|
||||
console.error('Could not extract from Flask app or engine spec metadata.');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
console.log(`Processed ${Object.keys(databases).length} databases\n`);
|
||||
|
||||
// Check if new data has scores; if not, preserve existing diagnostics
|
||||
const hasNewScores = Object.values(databases).some((db) => db.score > 0);
|
||||
if (!hasNewScores && existingData) {
|
||||
databases = mergeWithExistingDiagnostics(databases, existingData);
|
||||
}
|
||||
|
||||
// Build statistics
|
||||
const statistics = buildStatistics(databases);
|
||||
|
||||
// Create the final output structure
|
||||
const output = {
|
||||
generated: new Date().toISOString(),
|
||||
statistics,
|
||||
databases,
|
||||
};
|
||||
|
||||
// Write the JSON file (with trailing newline for POSIX compliance)
|
||||
fs.writeFileSync(DATA_OUTPUT_FILE, JSON.stringify(output, null, 2) + '\n');
|
||||
console.log(`Generated: ${path.relative(DOCS_DIR, DATA_OUTPUT_FILE)}`);
|
||||
|
||||
|
||||
// Ensure supported directory exists
|
||||
if (!fs.existsSync(MDX_SUPPORTED_DIR)) {
|
||||
fs.mkdirSync(MDX_SUPPORTED_DIR, { recursive: true });
|
||||
}
|
||||
|
||||
// Clean up old MDX files that are no longer in the database list
|
||||
console.log(`\nCleaning up old MDX files in ${path.relative(DOCS_DIR, MDX_SUPPORTED_DIR)}/`);
|
||||
const existingMdxFiles = fs.readdirSync(MDX_SUPPORTED_DIR).filter(f => f.endsWith('.mdx'));
|
||||
const validSlugs = new Set(Object.keys(databases).map(name => `${toSlug(name)}.mdx`));
|
||||
let removedCount = 0;
|
||||
for (const file of existingMdxFiles) {
|
||||
if (!validSlugs.has(file)) {
|
||||
fs.unlinkSync(path.join(MDX_SUPPORTED_DIR, file));
|
||||
removedCount++;
|
||||
}
|
||||
}
|
||||
if (removedCount > 0) {
|
||||
console.log(` Removed ${removedCount} outdated MDX files`);
|
||||
}
|
||||
|
||||
// Generate individual MDX files for each database in supported/ subdirectory
|
||||
console.log(`\nGenerating MDX files in ${path.relative(DOCS_DIR, MDX_SUPPORTED_DIR)}/`);
|
||||
|
||||
let mdxCount = 0;
|
||||
for (const [name, db] of Object.entries(databases)) {
|
||||
const slug = toSlug(name);
|
||||
const mdxContent = generateDatabaseMDX(name, db);
|
||||
const mdxPath = path.join(MDX_SUPPORTED_DIR, `${slug}.mdx`);
|
||||
fs.writeFileSync(mdxPath, mdxContent);
|
||||
mdxCount++;
|
||||
}
|
||||
console.log(` Generated ${mdxCount} database pages`);
|
||||
|
||||
// Generate index page in parent databases/ directory
|
||||
const indexContent = generateIndexMDX(statistics, usedFlaskContext);
|
||||
const indexPath = path.join(MDX_OUTPUT_DIR, 'index.mdx');
|
||||
fs.writeFileSync(indexPath, indexContent);
|
||||
console.log(` Generated index page`);
|
||||
|
||||
// Generate _category_.json for databases/ directory
|
||||
const categoryJson = {
|
||||
label: 'Databases',
|
||||
position: 1,
|
||||
link: {
|
||||
type: 'doc',
|
||||
id: 'databases/index',
|
||||
},
|
||||
};
|
||||
fs.writeFileSync(
|
||||
path.join(MDX_OUTPUT_DIR, '_category_.json'),
|
||||
JSON.stringify(categoryJson, null, 2) + '\n'
|
||||
);
|
||||
|
||||
// Generate _category_.json for supported/ subdirectory (collapsible)
|
||||
const supportedCategoryJson = {
|
||||
label: 'Supported Databases',
|
||||
position: 2,
|
||||
collapsed: true,
|
||||
collapsible: true,
|
||||
};
|
||||
fs.writeFileSync(
|
||||
path.join(MDX_SUPPORTED_DIR, '_category_.json'),
|
||||
JSON.stringify(supportedCategoryJson, null, 2) + '\n'
|
||||
);
|
||||
console.log(` Generated _category_.json files`);
|
||||
|
||||
// Update README.md database logos (only when explicitly requested)
|
||||
if (process.env.UPDATE_README === 'true' || process.argv.includes('--update-readme')) {
|
||||
console.log('');
|
||||
updateReadme(databases);
|
||||
}
|
||||
|
||||
console.log(`\nStatistics:`);
|
||||
console.log(` Total databases: ${statistics.totalDatabases}`);
|
||||
console.log(` With documentation: ${statistics.withDocumentation}`);
|
||||
console.log(` With connection strings: ${statistics.withConnectionString}`);
|
||||
console.log(` Categories: ${Object.keys(statistics.byCategory).length}`);
|
||||
|
||||
console.log('\nDone!');
|
||||
}
|
||||
|
||||
main().catch(console.error);
|
||||
Reference in New Issue
Block a user