docs(db_engine_specs): restructure feature table for GitHub rendering (#35809)

Co-authored-by: Claude <noreply@anthropic.com>
2026-04-20 00:24:38 +00:00 · 2025-10-23 10:47:35 -07:00
parent 5e4a80e5d0
commit 93cb60b24e
2 changed files with 870 additions and 66 deletions
--- a/superset/db_engine_specs/lib.py
+++ b/superset/db_engine_specs/lib.py
@@ -120,6 +120,7 @@ def diagnose(spec: type[BaseEngineSpec]) -> dict[str, Any]:
        {
            "module": spec.__module__,
            "limit_method": spec.limit_method.value,
+            "limit_clause": getattr(spec, "allow_limit_clause", True),
            "joins": spec.allows_joins,
            "subqueries": spec.allows_subqueries,
            "alias_in_select": spec.allows_alias_in_select,
@@ -209,9 +210,374 @@ def get_name(spec: type[BaseEngineSpec]) -> str:
    return spec.engine_name or spec.engine


+def format_markdown_table(headers: list[str], rows: list[list[Any]]) -> str:
+    """
+    Format headers and rows into a markdown table.
+    """
+    lines = []
+    lines.append("| " + " | ".join(headers) + " |")
+    lines.append("| " + " | ".join(["---"] * len(headers)) + " |")
+    for row in rows:
+        lines.append("| " + " | ".join(str(col) for col in row) + " |")
+    return "\n".join(lines)
+
+
+def generate_focused_table(
+    info: dict[str, dict[str, Any]],
+    feature_keys: list[str],
+    column_labels: list[str],
+    filter_fn: Any = None,
+    value_extractor: Any = None,
+    preserve_order: bool = False,
+) -> tuple[str, list[str]]:
+    """
+    Generate a focused markdown table with databases as rows.
+
+    Args:
+        info: Dictionary mapping database names to their feature info
+        feature_keys: List of feature keys to extract from db_info
+        column_labels: List of column header labels
+        filter_fn: Optional function to filter databases (receives db_info dict)
+        value_extractor: Optional function to extract value (receives db_info, key)
+
+    Returns:
+        Tuple of (markdown table string, list of excluded database names)
+    """
+    # Filter databases if filter function provided
+    filtered_info = {}
+    excluded_dbs = []
+
+    for db_name, db_info in info.items():
+        if filter_fn is None or filter_fn(db_info):
+            filtered_info[db_name] = db_info
+        else:
+            excluded_dbs.append(db_name)
+
+    if not filtered_info:
+        return "", excluded_dbs
+
+    # Build headers: Database + feature columns
+    headers = ["Database"] + column_labels
+
+    # Build rows
+    rows = []
+    # Sort by database name unless preserve_order is True
+    db_names = (
+        list(filtered_info.keys()) if preserve_order else sorted(filtered_info.keys())
+    )
+
+    for db_name in db_names:
+        db_info = filtered_info[db_name]
+        row = [db_name]
+
+        for key in feature_keys:
+            if value_extractor:
+                value = value_extractor(db_info, key)
+            else:
+                value = db_info.get(key, "")
+            row.append(value)
+
+        rows.append(row)
+
+    return format_markdown_table(headers, rows), excluded_dbs
+
+
+def calculate_support_level(db_info: dict[str, Any], feature_keys: list[str]) -> str:
+    """
+    Calculate support level for a group of features.
+
+    Returns: "Supported", "Partial", or "Not supported"
+    """
+    if not feature_keys:
+        return "Not supported"
+
+    # Handle time grain features specially
+    if all(k.startswith("time_grains.") for k in feature_keys):
+        grain_keys = [k.split(".", 1)[1] for k in feature_keys]
+        supported = sum(
+            1 for grain in grain_keys if db_info["time_grains"].get(grain, False)
+        )
+    else:
+        supported = sum(1 for k in feature_keys if db_info.get(k, False))
+
+    total = len(feature_keys)
+    if supported == 0:
+        return "Not supported"
+    elif supported == total:
+        return "Supported"
+    else:
+        return "Partial"
+
+
+def generate_feature_tables() -> str:
+    """
+    Generate multiple focused markdown tables organized by feature categories.
+
+    Returns a complete markdown document with 7 tables optimized for readability.
+    """
+    info = {}
+    for spec in sorted(load_engine_specs(), key=get_name):
+        info[get_name(spec)] = diagnose(spec)
+
+    # remove 3rd party DB engine specs
+    info = {k: v for k, v in info.items() if v["module"].startswith("superset")}
+
+    # Sort by score descending for overview table
+    sorted_info = dict(sorted(info.items(), key=lambda x: x[1]["score"], reverse=True))
+
+    output = []
+
+    # Table 1: Feature Overview
+    output.append("### Feature Overview\n")
+
+    # Define feature groups for summary
+    sql_basics = [
+        "joins",
+        "subqueries",
+        "alias_in_select",
+        "alias_in_orderby",
+        "cte_in_subquery",
+    ]
+    advanced_sql = [
+        "time_groupby_inline",
+        "alias_to_source_column",
+        "order_by_not_in_select",
+        "expressions_in_orderby",
+    ]
+    common_grains = [
+        f"time_grains.{g}"
+        for g in ["SECOND", "MINUTE", "HOUR", "DAY", "WEEK", "MONTH", "QUARTER", "YEAR"]
+    ]
+    extended_grains = [
+        f"time_grains.{g}"
+        for g in [
+            "FIVE_SECONDS",
+            "THIRTY_SECONDS",
+            "FIVE_MINUTES",
+            "TEN_MINUTES",
+            "FIFTEEN_MINUTES",
+            "THIRTY_MINUTES",
+            "HALF_HOUR",
+            "SIX_HOURS",
+            "WEEK_STARTING_SUNDAY",
+            "WEEK_STARTING_MONDAY",
+            "WEEK_ENDING_SATURDAY",
+            "WEEK_ENDING_SUNDAY",
+            "QUARTER_YEAR",
+        ]
+    ]
+    integrations = [
+        "ssh_tunneling",
+        "query_cancelation",
+        "get_metrics",
+        "get_extra_table_metadata",
+        "dbapi_exception_mapping",
+        "custom_errors",
+        "dynamic_schema",
+        "where_latest_partition",
+    ]
+    advanced_features = [
+        "user_impersonation",
+        "expand_data",
+        "query_cost_estimation",
+        "sql_validation",
+    ]
+
+    headers = [
+        "Database",
+        "Score",
+        "SQL Basics",
+        "Advanced SQL",
+        "Common Time Grains",
+        "Extended Time Grains",
+        "Integrations",
+        "Advanced Features",
+    ]
+    rows = []
+    for db_name, db_info in sorted_info.items():
+        row = [
+            db_name,
+            db_info["score"],
+            calculate_support_level(db_info, sql_basics),
+            calculate_support_level(db_info, advanced_sql),
+            calculate_support_level(db_info, common_grains),
+            calculate_support_level(db_info, extended_grains),
+            calculate_support_level(db_info, integrations),
+            calculate_support_level(db_info, advanced_features),
+        ]
+        rows.append(row)
+    output.append(format_markdown_table(headers, rows))
+
+    # Table 2: Database Information
+    output.append("\n### Database Information\n")
+
+    # Custom value extractor for database info to handle limit_method enum
+    def extract_db_info(db_info: dict[str, Any], key: str) -> str:
+        if key == "limit_method":
+            # Convert enum value to name
+            from superset.sql.parse import LimitMethod
+
+            return LimitMethod(db_info[key]).name
+        return db_info.get(key, "")
+
+    table, _ = generate_focused_table(
+        info,
+        feature_keys=["module", "limit_method", "limit_clause", "max_column_name"],
+        column_labels=["Module", "Limit Method", "Limit Clause", "Max Column Name"],
+        value_extractor=extract_db_info,
+    )
+    output.append(table)
+
+    # Table 3: SQL Capabilities (combined SQL Capabilities + Advanced SQL)
+    output.append("\n### SQL Capabilities\n")
+    table, _ = generate_focused_table(
+        info,
+        feature_keys=[
+            "joins",
+            "subqueries",
+            "alias_in_select",
+            "alias_in_orderby",
+            "cte_in_subquery",
+            "sql_comments",
+            "escaped_colons",
+            "time_groupby_inline",
+            "alias_to_source_column",
+            "order_by_not_in_select",
+            "expressions_in_orderby",
+        ],
+        column_labels=[
+            "JOINs",
+            "Subqueries",
+            "Aliases in SELECT",
+            "Aliases in ORDER BY",
+            "CTEs",
+            "Comments",
+            "Escaped Colons",
+            "Inline Time Groupby",
+            "Source Column When Aliased",
+            "Aggregations in ORDER BY",
+            "Expressions in ORDER BY",
+        ],
+    )
+    output.append(table)
+
+    # Helper to extract time grain values
+    def extract_time_grain(db_info: dict[str, Any], grain_name: str) -> str:
+        return db_info["time_grains"].get(grain_name, False)
+
+    # Table 4: Time Grains – Common
+    output.append("\n### Time Grains – Common\n")
+    common_grains = [
+        "SECOND",
+        "MINUTE",
+        "HOUR",
+        "DAY",
+        "WEEK",
+        "MONTH",
+        "QUARTER",
+        "YEAR",
+    ]
+    table, _ = generate_focused_table(
+        info,
+        feature_keys=common_grains,
+        column_labels=common_grains,
+        value_extractor=extract_time_grain,
+    )
+    output.append(table)
+
+    # Table 5: Time Grains – Extended
+    output.append("\n### Time Grains – Extended\n")
+    extended_grains = [
+        "FIVE_SECONDS",
+        "THIRTY_SECONDS",
+        "FIVE_MINUTES",
+        "TEN_MINUTES",
+        "FIFTEEN_MINUTES",
+        "THIRTY_MINUTES",
+        "HALF_HOUR",
+        "SIX_HOURS",
+        "WEEK_STARTING_SUNDAY",
+        "WEEK_STARTING_MONDAY",
+        "WEEK_ENDING_SATURDAY",
+        "WEEK_ENDING_SUNDAY",
+        "QUARTER_YEAR",
+    ]
+    table, _ = generate_focused_table(
+        info,
+        feature_keys=extended_grains,
+        column_labels=extended_grains,
+        value_extractor=extract_time_grain,
+    )
+    output.append(table)
+
+    # Table 6: Core Platform & Metadata Features
+    output.append("\n### Core Platform & Metadata Features\n")
+    output.append("\nIntegration with platform features and metadata handling.\n")
+    table, _ = generate_focused_table(
+        info,
+        feature_keys=[
+            "masked_encrypted_extra",
+            "column_type_mapping",
+            "function_names",
+            "file_upload",
+            "dynamic_schema",
+            "catalog",
+            "dynamic_catalog",
+            "ssh_tunneling",
+            "where_latest_partition",
+            "query_cancelation",
+            "get_metrics",
+            "get_extra_table_metadata",
+            "dbapi_exception_mapping",
+            "custom_errors",
+        ],
+        column_labels=[
+            "Masked Encrypted Extra",
+            "Column Type Mappings",
+            "Function Names",
+            "File Upload",
+            "Dynamic Schema",
+            "Catalog",
+            "Dynamic Catalog",
+            "SSH Tunneling",
+            "Latest Partition",
+            "Query Cancellation",
+            "Get Metrics",
+            "Extra Table Metadata",
+            "Exception Mapping",
+            "Custom Errors",
+        ],
+    )
+    output.append(table)
+
+    # Table 7: Operational & Advanced Features
+    output.append("\n### Operational & Advanced Features\n")
+    table, _ = generate_focused_table(
+        info,
+        feature_keys=[
+            "user_impersonation",
+            "expand_data",
+            "query_cost_estimation",
+            "sql_validation",
+        ],
+        column_labels=[
+            "User Impersonation",
+            "Expand Data",
+            "Cost Estimation",
+            "SQL Validation",
+        ],
+    )
+    output.append(table)
+
+    return "\n".join(output)
+
+
 def generate_table() -> list[list[Any]]:
    """
    Generate a table showing info for all DB engine specs.
+
+    DEPRECATED: This function is kept for backward compatibility.
+    Use generate_feature_tables() instead for better readability.
    """
    info = {}
    for spec in sorted(load_engine_specs(), key=get_name):
@@ -301,10 +667,6 @@ if __name__ == "__main__":

    app = create_app()
    with app.app_context():
-        rows = generate_table()
+        output = generate_feature_tables()

-    headers = rows.pop(0)
-    print("| " + " | ".join(headers) + " |")
-    print("| " + " ---| " * len(headers))
-    for row in rows:
-        print("| " + " | ".join(str(col) for col in row) + " |")
+    print(output)