mirror of
https://github.com/apache/superset.git
synced 2026-04-07 18:35:15 +00:00
1027 lines
34 KiB
JavaScript
1027 lines
34 KiB
JavaScript
/**
|
|
* Licensed to the Apache Software Foundation (ASF) under one
|
|
* or more contributor license agreements. See the NOTICE file
|
|
* distributed with this work for additional information
|
|
* regarding copyright ownership. The ASF licenses this file
|
|
* to you under the Apache License, Version 2.0 (the
|
|
* "License"); you may not use this file except in compliance
|
|
* with the License. You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing,
|
|
* software distributed under the License is distributed on an
|
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
* KIND, either express or implied. See the License for the
|
|
* specific language governing permissions and limitations
|
|
* under the License.
|
|
*/
|
|
|
|
/**
|
|
* This script generates database documentation data from engine spec metadata.
|
|
* It outputs a JSON file that can be imported by React components for rendering.
|
|
*
|
|
* Usage: node scripts/generate-database-docs.mjs
|
|
*
|
|
* The script can run in two modes:
|
|
* 1. With Flask app (full diagnostics) - requires superset to be installed
|
|
* 2. Fallback mode (documentation only) - parses engine spec `metadata` attributes via AST
|
|
*/
|
|
|
|
import { spawnSync } from 'child_process';
|
|
import fs from 'fs';
|
|
import { createRequire } from 'module';
|
|
import path from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
|
|
const require = createRequire(import.meta.url);
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = path.dirname(__filename);
|
|
const ROOT_DIR = path.resolve(__dirname, '../..');
|
|
const DOCS_DIR = path.resolve(__dirname, '..');
|
|
const DATA_OUTPUT_DIR = path.join(DOCS_DIR, 'src/data');
|
|
const DATA_OUTPUT_FILE = path.join(DATA_OUTPUT_DIR, 'databases.json');
|
|
const MDX_OUTPUT_DIR = path.join(DOCS_DIR, 'docs/databases');
|
|
const MDX_SUPPORTED_DIR = path.join(MDX_OUTPUT_DIR, 'supported');
|
|
const IMAGES_DIR = path.join(DOCS_DIR, 'static/img/databases');
|
|
|
|
/**
|
|
* Try to run the full lib.py script with Flask context
|
|
*/
|
|
function tryRunFullScript() {
|
|
try {
|
|
console.log('Attempting to run lib.py with Flask context...');
|
|
const pythonCode = `
|
|
import sys
|
|
import json
|
|
sys.path.insert(0, '.')
|
|
from superset.app import create_app
|
|
from superset.db_engine_specs.lib import generate_yaml_docs
|
|
app = create_app()
|
|
with app.app_context():
|
|
docs = generate_yaml_docs()
|
|
print(json.dumps(docs, default=str))
|
|
`;
|
|
const result = spawnSync('python', ['-c', pythonCode], {
|
|
cwd: ROOT_DIR,
|
|
encoding: 'utf-8',
|
|
timeout: 60000,
|
|
maxBuffer: 10 * 1024 * 1024,
|
|
env: { ...process.env, SUPERSET_SECRET_KEY: 'docs-build-key' },
|
|
});
|
|
|
|
if (result.error) {
|
|
throw result.error;
|
|
}
|
|
if (result.status !== 0) {
|
|
throw new Error(result.stderr || 'Python script failed');
|
|
}
|
|
return JSON.parse(result.stdout);
|
|
} catch (error) {
|
|
console.log('Full script execution failed, using fallback mode...');
|
|
console.log(' Reason:', error.message?.split('\n')[0] || 'Unknown error');
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Extract metadata from individual engine spec files using AST parsing
|
|
* This is the preferred approach - reads directly from spec.metadata attributes
|
|
* Supports metadata inheritance - child classes inherit and merge with parent metadata
|
|
*/
|
|
function extractEngineSpecMetadata() {
|
|
console.log('Extracting metadata from engine spec files...');
|
|
console.log(` ROOT_DIR: ${ROOT_DIR}`);
|
|
|
|
try {
|
|
const pythonCode = `
|
|
import sys
|
|
import json
|
|
import ast
|
|
import os
|
|
|
|
def eval_node(node):
|
|
"""Safely evaluate an AST node as a Python literal."""
|
|
if node is None:
|
|
return None
|
|
if isinstance(node, ast.Constant):
|
|
return node.value
|
|
elif isinstance(node, ast.List):
|
|
return [eval_node(e) for e in node.elts]
|
|
elif isinstance(node, ast.Dict):
|
|
result = {}
|
|
for k, v in zip(node.keys, node.values):
|
|
if k is not None:
|
|
key = eval_node(k)
|
|
if key is not None:
|
|
result[key] = eval_node(v)
|
|
return result
|
|
elif isinstance(node, ast.Name):
|
|
# Handle True, False, None constants
|
|
if node.id == 'True':
|
|
return True
|
|
elif node.id == 'False':
|
|
return False
|
|
elif node.id == 'None':
|
|
return None
|
|
return node.id
|
|
elif isinstance(node, ast.Attribute):
|
|
# Handle DatabaseCategory.SOMETHING - return just the attribute name
|
|
return node.attr
|
|
elif isinstance(node, ast.BinOp) and isinstance(node.op, ast.Add):
|
|
left, right = eval_node(node.left), eval_node(node.right)
|
|
if isinstance(left, str) and isinstance(right, str):
|
|
return left + right
|
|
return None
|
|
elif isinstance(node, ast.Tuple):
|
|
return tuple(eval_node(e) for e in node.elts)
|
|
elif isinstance(node, ast.JoinedStr):
|
|
# f-strings - just return a placeholder
|
|
return "<f-string>"
|
|
return None
|
|
|
|
def deep_merge(base, override):
|
|
"""Deep merge two dictionaries. Override values take precedence."""
|
|
if base is None:
|
|
return override
|
|
if override is None:
|
|
return base
|
|
if not isinstance(base, dict) or not isinstance(override, dict):
|
|
return override
|
|
|
|
# Fields that should NOT be inherited from parent classes
|
|
# - compatible_databases: Each class defines its own compatible DBs
|
|
# - categories: Each class defines its own categories (not extended from parent)
|
|
NON_INHERITABLE_FIELDS = {'compatible_databases', 'categories'}
|
|
|
|
result = base.copy()
|
|
# Remove non-inheritable fields from base (they should only come from the class that defines them)
|
|
for field in NON_INHERITABLE_FIELDS:
|
|
result.pop(field, None)
|
|
|
|
for key, value in override.items():
|
|
if key in result and isinstance(result[key], dict) and isinstance(value, dict):
|
|
result[key] = deep_merge(result[key], value)
|
|
elif key in result and isinstance(result[key], list) and isinstance(value, list):
|
|
# Extend lists from parent (e.g., drivers)
|
|
result[key] = result[key] + value
|
|
else:
|
|
result[key] = value
|
|
return result
|
|
|
|
databases = {}
|
|
specs_dir = 'superset/db_engine_specs'
|
|
errors = []
|
|
debug_info = {
|
|
"cwd": os.getcwd(),
|
|
"specs_dir_exists": os.path.isdir(specs_dir),
|
|
"files_checked": 0,
|
|
"classes_found": 0,
|
|
"classes_with_metadata": 0,
|
|
"inherited_metadata": 0,
|
|
}
|
|
|
|
if not os.path.isdir(specs_dir):
|
|
print(json.dumps({"error": f"Directory not found: {specs_dir}", "cwd": os.getcwd()}))
|
|
sys.exit(1)
|
|
|
|
# First pass: collect all class info (name, bases, metadata)
|
|
class_info = {} # class_name -> {bases: [], metadata: {}, engine_name: str, filename: str}
|
|
|
|
for filename in sorted(os.listdir(specs_dir)):
|
|
if not filename.endswith('.py') or filename in ('__init__.py', 'lib.py', 'lint_metadata.py'):
|
|
continue
|
|
|
|
debug_info["files_checked"] += 1
|
|
filepath = os.path.join(specs_dir, filename)
|
|
try:
|
|
with open(filepath) as f:
|
|
source = f.read()
|
|
tree = ast.parse(source)
|
|
|
|
for node in ast.walk(tree):
|
|
if not isinstance(node, ast.ClassDef):
|
|
continue
|
|
|
|
# Get base class names
|
|
base_names = []
|
|
for b in node.bases:
|
|
if isinstance(b, ast.Name):
|
|
base_names.append(b.id)
|
|
elif isinstance(b, ast.Attribute):
|
|
base_names.append(b.attr)
|
|
|
|
is_engine_spec = any('EngineSpec' in name or 'Mixin' in name for name in base_names)
|
|
if not is_engine_spec:
|
|
continue
|
|
|
|
# Extract class attributes
|
|
engine_name = None
|
|
metadata = None
|
|
|
|
for item in node.body:
|
|
if isinstance(item, ast.Assign):
|
|
for target in item.targets:
|
|
if isinstance(target, ast.Name):
|
|
if target.id == 'engine_name':
|
|
val = eval_node(item.value)
|
|
if isinstance(val, str):
|
|
engine_name = val
|
|
elif target.id == 'metadata':
|
|
metadata = eval_node(item.value)
|
|
|
|
# Check for engine attribute with non-empty value to distinguish
|
|
# true base classes from product classes like OceanBaseEngineSpec
|
|
has_non_empty_engine = False
|
|
for item in node.body:
|
|
if isinstance(item, ast.Assign):
|
|
for target in item.targets:
|
|
if isinstance(target, ast.Name) and target.id == 'engine':
|
|
# Check if engine value is non-empty string
|
|
if isinstance(item.value, ast.Constant):
|
|
has_non_empty_engine = bool(item.value.value)
|
|
break
|
|
|
|
# True base classes: end with BaseEngineSpec AND don't define engine
|
|
# or have empty engine (like PostgresBaseEngineSpec with engine = "")
|
|
is_true_base = (
|
|
node.name.endswith('BaseEngineSpec') and not has_non_empty_engine
|
|
) or 'Mixin' in node.name
|
|
|
|
# Store class info for inheritance resolution
|
|
class_info[node.name] = {
|
|
'bases': base_names,
|
|
'metadata': metadata,
|
|
'engine_name': engine_name,
|
|
'filename': filename,
|
|
'is_base_or_mixin': is_true_base,
|
|
}
|
|
except Exception as e:
|
|
errors.append(f"{filename}: {str(e)}")
|
|
|
|
# Second pass: resolve inheritance and build final metadata
|
|
def get_inherited_metadata(class_name, visited=None):
|
|
"""Recursively get metadata from parent classes."""
|
|
if visited is None:
|
|
visited = set()
|
|
if class_name in visited:
|
|
return {} # Prevent circular inheritance
|
|
visited.add(class_name)
|
|
|
|
info = class_info.get(class_name)
|
|
if not info:
|
|
return {}
|
|
|
|
# Start with parent metadata
|
|
inherited = {}
|
|
for base_name in info['bases']:
|
|
parent_metadata = get_inherited_metadata(base_name, visited.copy())
|
|
if parent_metadata:
|
|
inherited = deep_merge(inherited, parent_metadata)
|
|
|
|
# Merge with own metadata (own takes precedence)
|
|
if info['metadata']:
|
|
inherited = deep_merge(inherited, info['metadata'])
|
|
|
|
return inherited
|
|
|
|
for class_name, info in class_info.items():
|
|
# Skip base classes and mixins
|
|
if info['is_base_or_mixin']:
|
|
continue
|
|
|
|
debug_info["classes_found"] += 1
|
|
|
|
# Get final metadata with inheritance
|
|
final_metadata = get_inherited_metadata(class_name)
|
|
|
|
# Remove compatible_databases if not defined by this class (it's not inheritable)
|
|
own_metadata = info['metadata'] or {}
|
|
if 'compatible_databases' not in own_metadata and 'compatible_databases' in final_metadata:
|
|
del final_metadata['compatible_databases']
|
|
|
|
# Track if we inherited anything
|
|
if final_metadata and final_metadata != own_metadata:
|
|
debug_info["inherited_metadata"] += 1
|
|
|
|
# Use class name as fallback for engine_name
|
|
display_name = info['engine_name'] or class_name.replace('EngineSpec', '').replace('_', ' ')
|
|
|
|
if final_metadata and isinstance(final_metadata, dict) and display_name:
|
|
debug_info["classes_with_metadata"] += 1
|
|
databases[display_name] = {
|
|
'engine': display_name.lower().replace(' ', '_'),
|
|
'engine_name': display_name,
|
|
'module': info['filename'][:-3], # Remove .py extension
|
|
'documentation': final_metadata,
|
|
'time_grains': {},
|
|
'score': 0,
|
|
'max_score': 0,
|
|
'joins': True,
|
|
'subqueries': True,
|
|
'supports_dynamic_schema': False,
|
|
'supports_catalog': False,
|
|
'supports_dynamic_catalog': False,
|
|
'ssh_tunneling': False,
|
|
'query_cancelation': False,
|
|
'supports_file_upload': False,
|
|
'user_impersonation': False,
|
|
'query_cost_estimation': False,
|
|
'sql_validation': False,
|
|
}
|
|
|
|
if errors and not databases:
|
|
print(json.dumps({"error": "Parse errors", "details": errors, "debug": debug_info}), file=sys.stderr)
|
|
|
|
# Print debug info to stderr for troubleshooting
|
|
print(json.dumps(debug_info), file=sys.stderr)
|
|
|
|
print(json.dumps(databases, default=str))
|
|
`;
|
|
const result = spawnSync('python3', ['-c', pythonCode], {
|
|
cwd: ROOT_DIR,
|
|
encoding: 'utf-8',
|
|
timeout: 30000,
|
|
maxBuffer: 10 * 1024 * 1024,
|
|
});
|
|
|
|
if (result.error) {
|
|
throw result.error;
|
|
}
|
|
// Log debug info from stderr
|
|
if (result.stderr) {
|
|
console.log('Python debug info:', result.stderr.trim());
|
|
}
|
|
if (result.status !== 0) {
|
|
throw new Error(result.stderr || 'Python script failed');
|
|
}
|
|
const databases = JSON.parse(result.stdout);
|
|
if (Object.keys(databases).length === 0) {
|
|
throw new Error('No metadata found in engine specs');
|
|
}
|
|
|
|
console.log(`Extracted metadata from ${Object.keys(databases).length} engine specs`);
|
|
return databases;
|
|
} catch (err) {
|
|
console.log('Engine spec metadata extraction failed:', err.message);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Build statistics from the database data
|
|
*/
|
|
function buildStatistics(databases) {
|
|
const stats = {
|
|
totalDatabases: Object.keys(databases).length,
|
|
withDocumentation: 0,
|
|
withConnectionString: 0,
|
|
withDrivers: 0,
|
|
withAuthMethods: 0,
|
|
supportsJoins: 0,
|
|
supportsSubqueries: 0,
|
|
supportsDynamicSchema: 0,
|
|
supportsCatalog: 0,
|
|
averageScore: 0,
|
|
maxScore: 0,
|
|
byCategory: {},
|
|
};
|
|
|
|
let totalScore = 0;
|
|
|
|
for (const [name, db] of Object.entries(databases)) {
|
|
const docs = db.documentation || {};
|
|
|
|
if (Object.keys(docs).length > 0) stats.withDocumentation++;
|
|
if (docs.connection_string || docs.drivers?.length > 0)
|
|
stats.withConnectionString++;
|
|
if (docs.drivers?.length > 0) stats.withDrivers++;
|
|
if (docs.authentication_methods?.length > 0) stats.withAuthMethods++;
|
|
if (db.joins) stats.supportsJoins++;
|
|
if (db.subqueries) stats.supportsSubqueries++;
|
|
if (db.supports_dynamic_schema) stats.supportsDynamicSchema++;
|
|
if (db.supports_catalog) stats.supportsCatalog++;
|
|
|
|
totalScore += db.score || 0;
|
|
if (db.max_score > stats.maxScore) stats.maxScore = db.max_score;
|
|
|
|
// Use categories from documentation metadata (computed by Python)
|
|
// Each database can belong to multiple categories
|
|
const categories = docs.categories || ['OTHER'];
|
|
for (const cat of categories) {
|
|
// Map category constant names to display names
|
|
const categoryDisplayNames = {
|
|
'CLOUD_AWS': 'Cloud - AWS',
|
|
'CLOUD_GCP': 'Cloud - Google',
|
|
'CLOUD_AZURE': 'Cloud - Azure',
|
|
'CLOUD_DATA_WAREHOUSES': 'Cloud Data Warehouses',
|
|
'APACHE_PROJECTS': 'Apache Projects',
|
|
'TRADITIONAL_RDBMS': 'Traditional RDBMS',
|
|
'ANALYTICAL_DATABASES': 'Analytical Databases',
|
|
'SEARCH_NOSQL': 'Search & NoSQL',
|
|
'QUERY_ENGINES': 'Query Engines',
|
|
'TIME_SERIES': 'Time Series Databases',
|
|
'OTHER': 'Other Databases',
|
|
'OPEN_SOURCE': 'Open Source',
|
|
'HOSTED_OPEN_SOURCE': 'Hosted Open Source',
|
|
'PROPRIETARY': 'Proprietary',
|
|
};
|
|
const displayName = categoryDisplayNames[cat] || cat;
|
|
if (!stats.byCategory[displayName]) {
|
|
stats.byCategory[displayName] = [];
|
|
}
|
|
stats.byCategory[displayName].push(name);
|
|
}
|
|
}
|
|
|
|
stats.averageScore = Math.round(totalScore / stats.totalDatabases);
|
|
|
|
return stats;
|
|
}
|
|
|
|
/**
|
|
* Convert database name to a URL-friendly slug
|
|
*/
|
|
function toSlug(name) {
|
|
return name
|
|
.toLowerCase()
|
|
.replace(/[^a-z0-9]+/g, '-')
|
|
.replace(/^-|-$/g, '');
|
|
}
|
|
|
|
/**
|
|
* Generate MDX content for a single database page
|
|
*/
|
|
function generateDatabaseMDX(name, db) {
|
|
const description = db.documentation?.description || `Documentation for ${name} database connection.`;
|
|
const shortDesc = description
|
|
.slice(0, 160)
|
|
.replace(/\\/g, '\\\\')
|
|
.replace(/"/g, '\\"');
|
|
|
|
return `---
|
|
title: ${name}
|
|
sidebar_label: ${name}
|
|
description: "${shortDesc}"
|
|
hide_title: true
|
|
---
|
|
|
|
{/*
|
|
Licensed to the Apache Software Foundation (ASF) under one
|
|
or more contributor license agreements. See the NOTICE file
|
|
distributed with this work for additional information
|
|
regarding copyright ownership. The ASF licenses this file
|
|
to you under the Apache License, Version 2.0 (the
|
|
"License"); you may not use this file except in compliance
|
|
with the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing,
|
|
software distributed under the License is distributed on an
|
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
KIND, either express or implied. See the License for the
|
|
specific language governing permissions and limitations
|
|
under the License.
|
|
*/}
|
|
|
|
import { DatabasePage } from '@site/src/components/databases';
|
|
import databaseData from '@site/src/data/databases.json';
|
|
|
|
<DatabasePage name="${name}" database={databaseData.databases["${name}"]} />
|
|
`;
|
|
}
|
|
|
|
/**
|
|
* Generate the index MDX for the databases overview
|
|
*/
|
|
function generateIndexMDX(statistics, usedFlaskContext = true) {
|
|
const fallbackNotice = usedFlaskContext ? '' : `
|
|
:::info Developer Note
|
|
This documentation was built without Flask context, so feature diagnostics (scores, time grain support, etc.)
|
|
may not reflect actual database capabilities. For full diagnostics, build docs locally with:
|
|
|
|
\`\`\`bash
|
|
cd docs && npm run gen-db-docs
|
|
\`\`\`
|
|
|
|
This requires a working Superset development environment.
|
|
:::
|
|
|
|
`;
|
|
|
|
return `---
|
|
title: Connecting to Databases
|
|
sidebar_label: Overview
|
|
sidebar_position: 1
|
|
---
|
|
|
|
{/*
|
|
Licensed to the Apache Software Foundation (ASF) under one
|
|
or more contributor license agreements. See the NOTICE file
|
|
distributed with this work for additional information
|
|
regarding copyright ownership. The ASF licenses this file
|
|
to you under the Apache License, Version 2.0 (the
|
|
"License"); you may not use this file except in compliance
|
|
with the License. You may obtain a copy of the License at
|
|
|
|
http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
Unless required by applicable law or agreed to in writing,
|
|
software distributed under the License is distributed on an
|
|
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
KIND, either express or implied. See the License for the
|
|
specific language governing permissions and limitations
|
|
under the License.
|
|
*/}
|
|
|
|
import { DatabaseIndex } from '@site/src/components/databases';
|
|
import databaseData from '@site/src/data/databases.json';
|
|
|
|
# Connecting to Databases
|
|
|
|
Superset does not ship bundled with connectivity to databases. The main step in connecting
|
|
Superset to a database is to **install the proper database driver(s)** in your environment.
|
|
|
|
:::note
|
|
You'll need to install the required packages for the database you want to use as your metadata database
|
|
as well as the packages needed to connect to the databases you want to access through Superset.
|
|
For information about setting up Superset's metadata database, please refer to
|
|
installation documentations ([Docker Compose](/docs/installation/docker-compose), [Kubernetes](/docs/installation/kubernetes))
|
|
:::
|
|
|
|
## Supported Databases
|
|
|
|
Superset supports **${statistics.totalDatabases} databases** with varying levels of feature support.
|
|
Click on any database name to see detailed documentation including connection strings,
|
|
authentication methods, and configuration options.
|
|
|
|
<DatabaseIndex data={databaseData} />
|
|
|
|
## Installing Database Drivers
|
|
|
|
Superset requires a Python [DB-API database driver](https://peps.python.org/pep-0249/)
|
|
and a [SQLAlchemy dialect](https://docs.sqlalchemy.org/en/20/dialects/) to be installed for
|
|
each database engine you want to connect to.
|
|
|
|
### Installing Drivers in Docker
|
|
|
|
For Docker deployments, create a \`requirements-local.txt\` file in the \`docker\` directory:
|
|
|
|
\`\`\`bash
|
|
# Create the requirements file
|
|
touch ./docker/requirements-local.txt
|
|
|
|
# Add your driver (e.g., for PostgreSQL)
|
|
echo "psycopg2-binary" >> ./docker/requirements-local.txt
|
|
\`\`\`
|
|
|
|
Then restart your containers. The drivers will be installed automatically.
|
|
|
|
### Installing Drivers with pip
|
|
|
|
For non-Docker installations:
|
|
|
|
\`\`\`bash
|
|
pip install <driver-package>
|
|
\`\`\`
|
|
|
|
See individual database pages for the specific driver packages needed.
|
|
|
|
## Connecting Through the UI
|
|
|
|
1. Go to **Settings → Data: Database Connections**
|
|
2. Click **+ DATABASE**
|
|
3. Select your database type or enter a SQLAlchemy URI
|
|
4. Click **Test Connection** to verify
|
|
5. Click **Connect** to save
|
|
|
|
## Contributing
|
|
|
|
To add or update database documentation, add a \`metadata\` attribute to your engine spec class in
|
|
\`superset/db_engine_specs/\`. Documentation is auto-generated from these metadata attributes.
|
|
|
|
See [METADATA_STATUS.md](https://github.com/apache/superset/blob/master/superset/db_engine_specs/METADATA_STATUS.md)
|
|
for the current status of database documentation and the [README](https://github.com/apache/superset/blob/master/superset/db_engine_specs/README.md) for the metadata schema.
|
|
${fallbackNotice}`;
|
|
}
|
|
|
|
const README_PATH = path.join(ROOT_DIR, 'README.md');
|
|
const README_START_MARKER = '<!-- SUPPORTED_DATABASES_START -->';
|
|
const README_END_MARKER = '<!-- SUPPORTED_DATABASES_END -->';
|
|
|
|
/**
|
|
* Read image dimensions, with fallback SVG viewBox parsing for cases where
|
|
* image-size can't handle SVG width/height attributes (e.g., scientific notation).
|
|
*/
|
|
function getImageDimensions(imgPath) {
|
|
const sizeOf = require('image-size');
|
|
try {
|
|
const dims = sizeOf(imgPath);
|
|
// image-size may misparse SVG attributes (e.g. width="1e3" → 1).
|
|
// Fall back to viewBox parsing if a dimension looks wrong.
|
|
if (dims.type === 'svg' && (dims.width < 2 || dims.height < 2)) {
|
|
const content = fs.readFileSync(imgPath, 'utf-8');
|
|
const vbMatch = content.match(/viewBox=["']([^"']+)["']/);
|
|
if (vbMatch) {
|
|
const parts = vbMatch[1].trim().split(/[\s,]+/).map(Number);
|
|
if (parts.length >= 4 && parts[2] > 0 && parts[3] > 0) {
|
|
return { width: parts[2], height: parts[3] };
|
|
}
|
|
}
|
|
}
|
|
if (dims.width > 0 && dims.height > 0) {
|
|
return { width: dims.width, height: dims.height };
|
|
}
|
|
} catch { /* fall through */ }
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Compute display dimensions that fit within a bounding box while preserving
|
|
* the image's aspect ratio. Enforces a minimum height so very wide logos
|
|
* remain legible.
|
|
*/
|
|
function fitToBoundingBox(imgWidth, imgHeight, maxWidth, maxHeight, minHeight) {
|
|
const ratio = imgWidth / imgHeight;
|
|
// Start at max height, compute width
|
|
let h = maxHeight;
|
|
let w = h * ratio;
|
|
// If too wide, cap width and reduce height
|
|
if (w > maxWidth) {
|
|
w = maxWidth;
|
|
h = w / ratio;
|
|
}
|
|
// If height fell below minimum, enforce minimum (allow width to exceed max)
|
|
if (h < minHeight) {
|
|
h = minHeight;
|
|
w = h * ratio;
|
|
}
|
|
return { width: Math.round(w), height: Math.round(h) };
|
|
}
|
|
|
|
/**
|
|
* Generate the database logos HTML for README.md
|
|
* Only includes databases that have logos and homepage URLs.
|
|
* Deduplicates by logo filename to match the docs homepage behavior.
|
|
* Reads actual image dimensions to preserve aspect ratios.
|
|
*/
|
|
function generateReadmeLogos(databases) {
|
|
// Get databases with logos and homepage URLs, sorted alphabetically,
|
|
// deduplicated by logo filename (matches docs homepage logic in index.tsx)
|
|
const seenLogos = new Set();
|
|
const dbsWithLogos = Object.entries(databases)
|
|
.filter(([, db]) => db.documentation?.logo && db.documentation?.homepage_url)
|
|
.sort(([a], [b]) => a.localeCompare(b))
|
|
.filter(([, db]) => {
|
|
const logo = db.documentation.logo;
|
|
if (seenLogos.has(logo)) return false;
|
|
seenLogos.add(logo);
|
|
return true;
|
|
});
|
|
|
|
if (dbsWithLogos.length === 0) {
|
|
return '';
|
|
}
|
|
|
|
const MAX_WIDTH = 150;
|
|
const MAX_HEIGHT = 40;
|
|
const MIN_HEIGHT = 24;
|
|
|
|
const DOCS_BASE = 'https://superset.apache.org/docs/databases/supported';
|
|
|
|
// Generate linked logo tags with aspect-ratio-preserving dimensions
|
|
const logoTags = dbsWithLogos.map(([name, db]) => {
|
|
const logo = db.documentation.logo;
|
|
const slug = name.toLowerCase().replace(/[^a-z0-9]+/g, '-').replace(/^-|-$/g, '');
|
|
const imgPath = path.join(IMAGES_DIR, logo);
|
|
|
|
const dims = getImageDimensions(imgPath);
|
|
let sizeAttrs;
|
|
if (dims) {
|
|
const { width, height } = fitToBoundingBox(dims.width, dims.height, MAX_WIDTH, MAX_HEIGHT, MIN_HEIGHT);
|
|
sizeAttrs = `width="${width}" height="${height}"`;
|
|
} else {
|
|
console.warn(` Could not read dimensions for ${logo}, using height-only fallback`);
|
|
sizeAttrs = `height="${MAX_HEIGHT}"`;
|
|
}
|
|
|
|
const img = `<img src="docs/static/img/databases/${logo}" alt="${name}" ${sizeAttrs} />`;
|
|
return ` <a href="${DOCS_BASE}/${slug}" title="${name}">${img}</a>`;
|
|
});
|
|
|
|
// Use between logos for spacing (GitHub strips style/class attributes)
|
|
return `<p align="center">
|
|
${logoTags.join(' \n')}
|
|
</p>`;
|
|
}
|
|
|
|
/**
|
|
* Update the README.md with generated database logos
|
|
*/
|
|
function updateReadme(databases) {
|
|
if (!fs.existsSync(README_PATH)) {
|
|
console.log('README.md not found, skipping update');
|
|
return false;
|
|
}
|
|
|
|
const content = fs.readFileSync(README_PATH, 'utf-8');
|
|
|
|
// Check if markers exist
|
|
if (!content.includes(README_START_MARKER) || !content.includes(README_END_MARKER)) {
|
|
console.log('README.md missing database markers, skipping update');
|
|
console.log(` Add ${README_START_MARKER} and ${README_END_MARKER} to enable auto-generation`);
|
|
return false;
|
|
}
|
|
|
|
// Generate new logos section
|
|
const logosHtml = generateReadmeLogos(databases);
|
|
|
|
// Replace content between markers
|
|
const pattern = new RegExp(
|
|
`${README_START_MARKER}[\\s\\S]*?${README_END_MARKER}`,
|
|
'g'
|
|
);
|
|
const newContent = content.replace(
|
|
pattern,
|
|
`${README_START_MARKER}\n${logosHtml}\n${README_END_MARKER}`
|
|
);
|
|
|
|
if (newContent !== content) {
|
|
fs.writeFileSync(README_PATH, newContent);
|
|
console.log('Updated README.md database logos');
|
|
return true;
|
|
}
|
|
|
|
console.log('README.md database logos unchanged');
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Extract custom_errors from engine specs for troubleshooting documentation
|
|
* Returns a map of module names to their custom errors
|
|
*/
|
|
function extractCustomErrors() {
|
|
console.log('Extracting custom_errors from engine specs...');
|
|
|
|
try {
|
|
const scriptPath = path.join(__dirname, 'extract_custom_errors.py');
|
|
const result = spawnSync('python3', [scriptPath], {
|
|
cwd: ROOT_DIR,
|
|
encoding: 'utf-8',
|
|
timeout: 30000,
|
|
maxBuffer: 10 * 1024 * 1024,
|
|
});
|
|
|
|
if (result.error) {
|
|
throw result.error;
|
|
}
|
|
if (result.status !== 0) {
|
|
throw new Error(result.stderr || 'Python script failed');
|
|
}
|
|
|
|
const customErrors = JSON.parse(result.stdout);
|
|
const moduleCount = Object.keys(customErrors).length;
|
|
const errorCount = Object.values(customErrors).reduce((sum, classes) =>
|
|
sum + Object.values(classes).reduce((s, errs) => s + errs.length, 0), 0);
|
|
console.log(` Found ${errorCount} custom errors across ${moduleCount} modules`);
|
|
return customErrors;
|
|
} catch (err) {
|
|
console.log(' Could not extract custom_errors:', err.message);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Merge custom_errors into database documentation
|
|
* Maps by module name since that's how both datasets are keyed
|
|
*/
|
|
function mergeCustomErrors(databases, customErrors) {
|
|
if (!customErrors) return;
|
|
|
|
let mergedCount = 0;
|
|
|
|
for (const [, db] of Object.entries(databases)) {
|
|
if (!db.module) continue;
|
|
// Normalize module name: Flask mode uses full path (superset.db_engine_specs.postgres),
|
|
// but customErrors is keyed by file stem (postgres)
|
|
const moduleName = db.module.split('.').pop();
|
|
if (!customErrors[moduleName]) continue;
|
|
|
|
// Get all errors from all classes in this module
|
|
const moduleErrors = customErrors[moduleName];
|
|
const allErrors = [];
|
|
|
|
for (const classErrors of Object.values(moduleErrors)) {
|
|
allErrors.push(...classErrors);
|
|
}
|
|
|
|
if (allErrors.length > 0) {
|
|
// Add to documentation
|
|
db.documentation = db.documentation || {};
|
|
db.documentation.custom_errors = allErrors;
|
|
mergedCount++;
|
|
}
|
|
}
|
|
|
|
if (mergedCount > 0) {
|
|
console.log(` Merged custom_errors into ${mergedCount} database docs`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Load existing database data if available
|
|
*/
|
|
function loadExistingData() {
|
|
if (!fs.existsSync(DATA_OUTPUT_FILE)) {
|
|
return null;
|
|
}
|
|
|
|
try {
|
|
const content = fs.readFileSync(DATA_OUTPUT_FILE, 'utf-8');
|
|
return JSON.parse(content);
|
|
} catch (error) {
|
|
console.log('Could not load existing data:', error.message);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Merge new documentation with existing diagnostics
|
|
* Preserves score, time_grains, and feature flags from existing data
|
|
*/
|
|
function mergeWithExistingDiagnostics(newDatabases, existingData) {
|
|
if (!existingData?.databases) return newDatabases;
|
|
|
|
const diagnosticFields = [
|
|
'score', 'max_score', 'time_grains', 'joins', 'subqueries',
|
|
'supports_dynamic_schema', 'supports_catalog', 'supports_dynamic_catalog',
|
|
'ssh_tunneling', 'query_cancelation', 'supports_file_upload',
|
|
'user_impersonation', 'query_cost_estimation', 'sql_validation'
|
|
];
|
|
|
|
for (const [name, db] of Object.entries(newDatabases)) {
|
|
const existingDb = existingData.databases[name];
|
|
if (existingDb && existingDb.score > 0) {
|
|
// Preserve diagnostics from existing data
|
|
for (const field of diagnosticFields) {
|
|
if (existingDb[field] !== undefined) {
|
|
db[field] = existingDb[field];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
const preserved = Object.values(newDatabases).filter(d => d.score > 0).length;
|
|
if (preserved > 0) {
|
|
console.log(`Preserved diagnostics for ${preserved} databases from existing data`);
|
|
}
|
|
|
|
return newDatabases;
|
|
}
|
|
|
|
/**
|
|
* Main function
|
|
*/
|
|
async function main() {
|
|
console.log('Generating database documentation...\n');
|
|
|
|
// Ensure output directories exist
|
|
if (!fs.existsSync(DATA_OUTPUT_DIR)) {
|
|
fs.mkdirSync(DATA_OUTPUT_DIR, { recursive: true });
|
|
}
|
|
if (!fs.existsSync(MDX_OUTPUT_DIR)) {
|
|
fs.mkdirSync(MDX_OUTPUT_DIR, { recursive: true });
|
|
}
|
|
|
|
// Load existing data for potential merge
|
|
const existingData = loadExistingData();
|
|
|
|
// Try sources in order of preference:
|
|
// 1. Full script with Flask context (richest data with diagnostics)
|
|
// 2. Engine spec metadata files (works in CI without Flask)
|
|
let databases = tryRunFullScript();
|
|
let usedFlaskContext = !!databases;
|
|
|
|
if (!databases) {
|
|
// Extract from engine spec metadata (preferred for CI)
|
|
databases = extractEngineSpecMetadata();
|
|
}
|
|
|
|
if (!databases || Object.keys(databases).length === 0) {
|
|
console.error('Failed to generate database documentation data.');
|
|
console.error('Could not extract from Flask app or engine spec metadata.');
|
|
process.exit(1);
|
|
}
|
|
|
|
console.log(`Processed ${Object.keys(databases).length} databases\n`);
|
|
|
|
// Check if new data has scores; if not, preserve existing diagnostics
|
|
const hasNewScores = Object.values(databases).some((db) => db.score > 0);
|
|
if (!hasNewScores && existingData) {
|
|
databases = mergeWithExistingDiagnostics(databases, existingData);
|
|
}
|
|
|
|
// Extract and merge custom_errors for troubleshooting documentation
|
|
const customErrors = extractCustomErrors();
|
|
mergeCustomErrors(databases, customErrors);
|
|
|
|
// Build statistics
|
|
const statistics = buildStatistics(databases);
|
|
|
|
// Create the final output structure
|
|
const output = {
|
|
generated: new Date().toISOString(),
|
|
statistics,
|
|
databases,
|
|
};
|
|
|
|
// Write the JSON file (with trailing newline for POSIX compliance)
|
|
fs.writeFileSync(DATA_OUTPUT_FILE, JSON.stringify(output, null, 2) + '\n');
|
|
console.log(`Generated: ${path.relative(DOCS_DIR, DATA_OUTPUT_FILE)}`);
|
|
|
|
|
|
// Ensure supported directory exists
|
|
if (!fs.existsSync(MDX_SUPPORTED_DIR)) {
|
|
fs.mkdirSync(MDX_SUPPORTED_DIR, { recursive: true });
|
|
}
|
|
|
|
// Clean up old MDX files that are no longer in the database list
|
|
console.log(`\nCleaning up old MDX files in ${path.relative(DOCS_DIR, MDX_SUPPORTED_DIR)}/`);
|
|
const existingMdxFiles = fs.readdirSync(MDX_SUPPORTED_DIR).filter(f => f.endsWith('.mdx'));
|
|
const validSlugs = new Set(Object.keys(databases).map(name => `${toSlug(name)}.mdx`));
|
|
let removedCount = 0;
|
|
for (const file of existingMdxFiles) {
|
|
if (!validSlugs.has(file)) {
|
|
fs.unlinkSync(path.join(MDX_SUPPORTED_DIR, file));
|
|
removedCount++;
|
|
}
|
|
}
|
|
if (removedCount > 0) {
|
|
console.log(` Removed ${removedCount} outdated MDX files`);
|
|
}
|
|
|
|
// Generate individual MDX files for each database in supported/ subdirectory
|
|
console.log(`\nGenerating MDX files in ${path.relative(DOCS_DIR, MDX_SUPPORTED_DIR)}/`);
|
|
|
|
let mdxCount = 0;
|
|
for (const [name, db] of Object.entries(databases)) {
|
|
const slug = toSlug(name);
|
|
const mdxContent = generateDatabaseMDX(name, db);
|
|
const mdxPath = path.join(MDX_SUPPORTED_DIR, `${slug}.mdx`);
|
|
fs.writeFileSync(mdxPath, mdxContent);
|
|
mdxCount++;
|
|
}
|
|
console.log(` Generated ${mdxCount} database pages`);
|
|
|
|
// Generate index page in parent databases/ directory
|
|
const indexContent = generateIndexMDX(statistics, usedFlaskContext);
|
|
const indexPath = path.join(MDX_OUTPUT_DIR, 'index.mdx');
|
|
fs.writeFileSync(indexPath, indexContent);
|
|
console.log(` Generated index page`);
|
|
|
|
// Generate _category_.json for databases/ directory
|
|
const categoryJson = {
|
|
label: 'Databases',
|
|
position: 1,
|
|
link: {
|
|
type: 'doc',
|
|
id: 'databases/index',
|
|
},
|
|
};
|
|
fs.writeFileSync(
|
|
path.join(MDX_OUTPUT_DIR, '_category_.json'),
|
|
JSON.stringify(categoryJson, null, 2) + '\n'
|
|
);
|
|
|
|
// Generate _category_.json for supported/ subdirectory (collapsible)
|
|
const supportedCategoryJson = {
|
|
label: 'Supported Databases',
|
|
position: 2,
|
|
collapsed: true,
|
|
collapsible: true,
|
|
};
|
|
fs.writeFileSync(
|
|
path.join(MDX_SUPPORTED_DIR, '_category_.json'),
|
|
JSON.stringify(supportedCategoryJson, null, 2) + '\n'
|
|
);
|
|
console.log(` Generated _category_.json files`);
|
|
|
|
// Update README.md database logos (only when explicitly requested)
|
|
if (process.env.UPDATE_README === 'true' || process.argv.includes('--update-readme')) {
|
|
console.log('');
|
|
updateReadme(databases);
|
|
}
|
|
|
|
console.log(`\nStatistics:`);
|
|
console.log(` Total databases: ${statistics.totalDatabases}`);
|
|
console.log(` With documentation: ${statistics.withDocumentation}`);
|
|
console.log(` With connection strings: ${statistics.withConnectionString}`);
|
|
console.log(` Categories: ${Object.keys(statistics.byCategory).length}`);
|
|
|
|
console.log('\nDone!');
|
|
}
|
|
|
|
main().catch(console.error);
|