docs(db_engine_specs): restructure feature table for GitHub rendering (#35809)

Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
Joe Li
2025-10-23 10:47:35 -07:00
committed by GitHub
parent 5e4a80e5d0
commit 93cb60b24e
2 changed files with 870 additions and 66 deletions

View File

@@ -120,6 +120,7 @@ def diagnose(spec: type[BaseEngineSpec]) -> dict[str, Any]:
{
"module": spec.__module__,
"limit_method": spec.limit_method.value,
"limit_clause": getattr(spec, "allow_limit_clause", True),
"joins": spec.allows_joins,
"subqueries": spec.allows_subqueries,
"alias_in_select": spec.allows_alias_in_select,
@@ -209,9 +210,374 @@ def get_name(spec: type[BaseEngineSpec]) -> str:
return spec.engine_name or spec.engine
def format_markdown_table(headers: list[str], rows: list[list[Any]]) -> str:
"""
Format headers and rows into a markdown table.
"""
lines = []
lines.append("| " + " | ".join(headers) + " |")
lines.append("| " + " | ".join(["---"] * len(headers)) + " |")
for row in rows:
lines.append("| " + " | ".join(str(col) for col in row) + " |")
return "\n".join(lines)
def generate_focused_table(
info: dict[str, dict[str, Any]],
feature_keys: list[str],
column_labels: list[str],
filter_fn: Any = None,
value_extractor: Any = None,
preserve_order: bool = False,
) -> tuple[str, list[str]]:
"""
Generate a focused markdown table with databases as rows.
Args:
info: Dictionary mapping database names to their feature info
feature_keys: List of feature keys to extract from db_info
column_labels: List of column header labels
filter_fn: Optional function to filter databases (receives db_info dict)
value_extractor: Optional function to extract value (receives db_info, key)
Returns:
Tuple of (markdown table string, list of excluded database names)
"""
# Filter databases if filter function provided
filtered_info = {}
excluded_dbs = []
for db_name, db_info in info.items():
if filter_fn is None or filter_fn(db_info):
filtered_info[db_name] = db_info
else:
excluded_dbs.append(db_name)
if not filtered_info:
return "", excluded_dbs
# Build headers: Database + feature columns
headers = ["Database"] + column_labels
# Build rows
rows = []
# Sort by database name unless preserve_order is True
db_names = (
list(filtered_info.keys()) if preserve_order else sorted(filtered_info.keys())
)
for db_name in db_names:
db_info = filtered_info[db_name]
row = [db_name]
for key in feature_keys:
if value_extractor:
value = value_extractor(db_info, key)
else:
value = db_info.get(key, "")
row.append(value)
rows.append(row)
return format_markdown_table(headers, rows), excluded_dbs
def calculate_support_level(db_info: dict[str, Any], feature_keys: list[str]) -> str:
"""
Calculate support level for a group of features.
Returns: "Supported", "Partial", or "Not supported"
"""
if not feature_keys:
return "Not supported"
# Handle time grain features specially
if all(k.startswith("time_grains.") for k in feature_keys):
grain_keys = [k.split(".", 1)[1] for k in feature_keys]
supported = sum(
1 for grain in grain_keys if db_info["time_grains"].get(grain, False)
)
else:
supported = sum(1 for k in feature_keys if db_info.get(k, False))
total = len(feature_keys)
if supported == 0:
return "Not supported"
elif supported == total:
return "Supported"
else:
return "Partial"
def generate_feature_tables() -> str:
"""
Generate multiple focused markdown tables organized by feature categories.
Returns a complete markdown document with 7 tables optimized for readability.
"""
info = {}
for spec in sorted(load_engine_specs(), key=get_name):
info[get_name(spec)] = diagnose(spec)
# remove 3rd party DB engine specs
info = {k: v for k, v in info.items() if v["module"].startswith("superset")}
# Sort by score descending for overview table
sorted_info = dict(sorted(info.items(), key=lambda x: x[1]["score"], reverse=True))
output = []
# Table 1: Feature Overview
output.append("### Feature Overview\n")
# Define feature groups for summary
sql_basics = [
"joins",
"subqueries",
"alias_in_select",
"alias_in_orderby",
"cte_in_subquery",
]
advanced_sql = [
"time_groupby_inline",
"alias_to_source_column",
"order_by_not_in_select",
"expressions_in_orderby",
]
common_grains = [
f"time_grains.{g}"
for g in ["SECOND", "MINUTE", "HOUR", "DAY", "WEEK", "MONTH", "QUARTER", "YEAR"]
]
extended_grains = [
f"time_grains.{g}"
for g in [
"FIVE_SECONDS",
"THIRTY_SECONDS",
"FIVE_MINUTES",
"TEN_MINUTES",
"FIFTEEN_MINUTES",
"THIRTY_MINUTES",
"HALF_HOUR",
"SIX_HOURS",
"WEEK_STARTING_SUNDAY",
"WEEK_STARTING_MONDAY",
"WEEK_ENDING_SATURDAY",
"WEEK_ENDING_SUNDAY",
"QUARTER_YEAR",
]
]
integrations = [
"ssh_tunneling",
"query_cancelation",
"get_metrics",
"get_extra_table_metadata",
"dbapi_exception_mapping",
"custom_errors",
"dynamic_schema",
"where_latest_partition",
]
advanced_features = [
"user_impersonation",
"expand_data",
"query_cost_estimation",
"sql_validation",
]
headers = [
"Database",
"Score",
"SQL Basics",
"Advanced SQL",
"Common Time Grains",
"Extended Time Grains",
"Integrations",
"Advanced Features",
]
rows = []
for db_name, db_info in sorted_info.items():
row = [
db_name,
db_info["score"],
calculate_support_level(db_info, sql_basics),
calculate_support_level(db_info, advanced_sql),
calculate_support_level(db_info, common_grains),
calculate_support_level(db_info, extended_grains),
calculate_support_level(db_info, integrations),
calculate_support_level(db_info, advanced_features),
]
rows.append(row)
output.append(format_markdown_table(headers, rows))
# Table 2: Database Information
output.append("\n### Database Information\n")
# Custom value extractor for database info to handle limit_method enum
def extract_db_info(db_info: dict[str, Any], key: str) -> str:
if key == "limit_method":
# Convert enum value to name
from superset.sql.parse import LimitMethod
return LimitMethod(db_info[key]).name
return db_info.get(key, "")
table, _ = generate_focused_table(
info,
feature_keys=["module", "limit_method", "limit_clause", "max_column_name"],
column_labels=["Module", "Limit Method", "Limit Clause", "Max Column Name"],
value_extractor=extract_db_info,
)
output.append(table)
# Table 3: SQL Capabilities (combined SQL Capabilities + Advanced SQL)
output.append("\n### SQL Capabilities\n")
table, _ = generate_focused_table(
info,
feature_keys=[
"joins",
"subqueries",
"alias_in_select",
"alias_in_orderby",
"cte_in_subquery",
"sql_comments",
"escaped_colons",
"time_groupby_inline",
"alias_to_source_column",
"order_by_not_in_select",
"expressions_in_orderby",
],
column_labels=[
"JOINs",
"Subqueries",
"Aliases in SELECT",
"Aliases in ORDER BY",
"CTEs",
"Comments",
"Escaped Colons",
"Inline Time Groupby",
"Source Column When Aliased",
"Aggregations in ORDER BY",
"Expressions in ORDER BY",
],
)
output.append(table)
# Helper to extract time grain values
def extract_time_grain(db_info: dict[str, Any], grain_name: str) -> str:
return db_info["time_grains"].get(grain_name, False)
# Table 4: Time Grains Common
output.append("\n### Time Grains Common\n")
common_grains = [
"SECOND",
"MINUTE",
"HOUR",
"DAY",
"WEEK",
"MONTH",
"QUARTER",
"YEAR",
]
table, _ = generate_focused_table(
info,
feature_keys=common_grains,
column_labels=common_grains,
value_extractor=extract_time_grain,
)
output.append(table)
# Table 5: Time Grains Extended
output.append("\n### Time Grains Extended\n")
extended_grains = [
"FIVE_SECONDS",
"THIRTY_SECONDS",
"FIVE_MINUTES",
"TEN_MINUTES",
"FIFTEEN_MINUTES",
"THIRTY_MINUTES",
"HALF_HOUR",
"SIX_HOURS",
"WEEK_STARTING_SUNDAY",
"WEEK_STARTING_MONDAY",
"WEEK_ENDING_SATURDAY",
"WEEK_ENDING_SUNDAY",
"QUARTER_YEAR",
]
table, _ = generate_focused_table(
info,
feature_keys=extended_grains,
column_labels=extended_grains,
value_extractor=extract_time_grain,
)
output.append(table)
# Table 6: Core Platform & Metadata Features
output.append("\n### Core Platform & Metadata Features\n")
output.append("\nIntegration with platform features and metadata handling.\n")
table, _ = generate_focused_table(
info,
feature_keys=[
"masked_encrypted_extra",
"column_type_mapping",
"function_names",
"file_upload",
"dynamic_schema",
"catalog",
"dynamic_catalog",
"ssh_tunneling",
"where_latest_partition",
"query_cancelation",
"get_metrics",
"get_extra_table_metadata",
"dbapi_exception_mapping",
"custom_errors",
],
column_labels=[
"Masked Encrypted Extra",
"Column Type Mappings",
"Function Names",
"File Upload",
"Dynamic Schema",
"Catalog",
"Dynamic Catalog",
"SSH Tunneling",
"Latest Partition",
"Query Cancellation",
"Get Metrics",
"Extra Table Metadata",
"Exception Mapping",
"Custom Errors",
],
)
output.append(table)
# Table 7: Operational & Advanced Features
output.append("\n### Operational & Advanced Features\n")
table, _ = generate_focused_table(
info,
feature_keys=[
"user_impersonation",
"expand_data",
"query_cost_estimation",
"sql_validation",
],
column_labels=[
"User Impersonation",
"Expand Data",
"Cost Estimation",
"SQL Validation",
],
)
output.append(table)
return "\n".join(output)
def generate_table() -> list[list[Any]]:
"""
Generate a table showing info for all DB engine specs.
DEPRECATED: This function is kept for backward compatibility.
Use generate_feature_tables() instead for better readability.
"""
info = {}
for spec in sorted(load_engine_specs(), key=get_name):
@@ -301,10 +667,6 @@ if __name__ == "__main__":
app = create_app()
with app.app_context():
rows = generate_table()
output = generate_feature_tables()
headers = rows.pop(0)
print("| " + " | ".join(headers) + " |")
print("| " + " ---| " * len(headers))
for row in rows:
print("| " + " | ".join(str(col) for col in row) + " |")
print(output)