mirror of
https://github.com/apache/superset.git
synced 2026-04-20 00:24:38 +00:00
docs(db_engine_specs): restructure feature table for GitHub rendering (#35809)
Co-authored-by: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -120,6 +120,7 @@ def diagnose(spec: type[BaseEngineSpec]) -> dict[str, Any]:
|
||||
{
|
||||
"module": spec.__module__,
|
||||
"limit_method": spec.limit_method.value,
|
||||
"limit_clause": getattr(spec, "allow_limit_clause", True),
|
||||
"joins": spec.allows_joins,
|
||||
"subqueries": spec.allows_subqueries,
|
||||
"alias_in_select": spec.allows_alias_in_select,
|
||||
@@ -209,9 +210,374 @@ def get_name(spec: type[BaseEngineSpec]) -> str:
|
||||
return spec.engine_name or spec.engine
|
||||
|
||||
|
||||
def format_markdown_table(headers: list[str], rows: list[list[Any]]) -> str:
|
||||
"""
|
||||
Format headers and rows into a markdown table.
|
||||
"""
|
||||
lines = []
|
||||
lines.append("| " + " | ".join(headers) + " |")
|
||||
lines.append("| " + " | ".join(["---"] * len(headers)) + " |")
|
||||
for row in rows:
|
||||
lines.append("| " + " | ".join(str(col) for col in row) + " |")
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def generate_focused_table(
|
||||
info: dict[str, dict[str, Any]],
|
||||
feature_keys: list[str],
|
||||
column_labels: list[str],
|
||||
filter_fn: Any = None,
|
||||
value_extractor: Any = None,
|
||||
preserve_order: bool = False,
|
||||
) -> tuple[str, list[str]]:
|
||||
"""
|
||||
Generate a focused markdown table with databases as rows.
|
||||
|
||||
Args:
|
||||
info: Dictionary mapping database names to their feature info
|
||||
feature_keys: List of feature keys to extract from db_info
|
||||
column_labels: List of column header labels
|
||||
filter_fn: Optional function to filter databases (receives db_info dict)
|
||||
value_extractor: Optional function to extract value (receives db_info, key)
|
||||
|
||||
Returns:
|
||||
Tuple of (markdown table string, list of excluded database names)
|
||||
"""
|
||||
# Filter databases if filter function provided
|
||||
filtered_info = {}
|
||||
excluded_dbs = []
|
||||
|
||||
for db_name, db_info in info.items():
|
||||
if filter_fn is None or filter_fn(db_info):
|
||||
filtered_info[db_name] = db_info
|
||||
else:
|
||||
excluded_dbs.append(db_name)
|
||||
|
||||
if not filtered_info:
|
||||
return "", excluded_dbs
|
||||
|
||||
# Build headers: Database + feature columns
|
||||
headers = ["Database"] + column_labels
|
||||
|
||||
# Build rows
|
||||
rows = []
|
||||
# Sort by database name unless preserve_order is True
|
||||
db_names = (
|
||||
list(filtered_info.keys()) if preserve_order else sorted(filtered_info.keys())
|
||||
)
|
||||
|
||||
for db_name in db_names:
|
||||
db_info = filtered_info[db_name]
|
||||
row = [db_name]
|
||||
|
||||
for key in feature_keys:
|
||||
if value_extractor:
|
||||
value = value_extractor(db_info, key)
|
||||
else:
|
||||
value = db_info.get(key, "")
|
||||
row.append(value)
|
||||
|
||||
rows.append(row)
|
||||
|
||||
return format_markdown_table(headers, rows), excluded_dbs
|
||||
|
||||
|
||||
def calculate_support_level(db_info: dict[str, Any], feature_keys: list[str]) -> str:
|
||||
"""
|
||||
Calculate support level for a group of features.
|
||||
|
||||
Returns: "Supported", "Partial", or "Not supported"
|
||||
"""
|
||||
if not feature_keys:
|
||||
return "Not supported"
|
||||
|
||||
# Handle time grain features specially
|
||||
if all(k.startswith("time_grains.") for k in feature_keys):
|
||||
grain_keys = [k.split(".", 1)[1] for k in feature_keys]
|
||||
supported = sum(
|
||||
1 for grain in grain_keys if db_info["time_grains"].get(grain, False)
|
||||
)
|
||||
else:
|
||||
supported = sum(1 for k in feature_keys if db_info.get(k, False))
|
||||
|
||||
total = len(feature_keys)
|
||||
if supported == 0:
|
||||
return "Not supported"
|
||||
elif supported == total:
|
||||
return "Supported"
|
||||
else:
|
||||
return "Partial"
|
||||
|
||||
|
||||
def generate_feature_tables() -> str:
|
||||
"""
|
||||
Generate multiple focused markdown tables organized by feature categories.
|
||||
|
||||
Returns a complete markdown document with 7 tables optimized for readability.
|
||||
"""
|
||||
info = {}
|
||||
for spec in sorted(load_engine_specs(), key=get_name):
|
||||
info[get_name(spec)] = diagnose(spec)
|
||||
|
||||
# remove 3rd party DB engine specs
|
||||
info = {k: v for k, v in info.items() if v["module"].startswith("superset")}
|
||||
|
||||
# Sort by score descending for overview table
|
||||
sorted_info = dict(sorted(info.items(), key=lambda x: x[1]["score"], reverse=True))
|
||||
|
||||
output = []
|
||||
|
||||
# Table 1: Feature Overview
|
||||
output.append("### Feature Overview\n")
|
||||
|
||||
# Define feature groups for summary
|
||||
sql_basics = [
|
||||
"joins",
|
||||
"subqueries",
|
||||
"alias_in_select",
|
||||
"alias_in_orderby",
|
||||
"cte_in_subquery",
|
||||
]
|
||||
advanced_sql = [
|
||||
"time_groupby_inline",
|
||||
"alias_to_source_column",
|
||||
"order_by_not_in_select",
|
||||
"expressions_in_orderby",
|
||||
]
|
||||
common_grains = [
|
||||
f"time_grains.{g}"
|
||||
for g in ["SECOND", "MINUTE", "HOUR", "DAY", "WEEK", "MONTH", "QUARTER", "YEAR"]
|
||||
]
|
||||
extended_grains = [
|
||||
f"time_grains.{g}"
|
||||
for g in [
|
||||
"FIVE_SECONDS",
|
||||
"THIRTY_SECONDS",
|
||||
"FIVE_MINUTES",
|
||||
"TEN_MINUTES",
|
||||
"FIFTEEN_MINUTES",
|
||||
"THIRTY_MINUTES",
|
||||
"HALF_HOUR",
|
||||
"SIX_HOURS",
|
||||
"WEEK_STARTING_SUNDAY",
|
||||
"WEEK_STARTING_MONDAY",
|
||||
"WEEK_ENDING_SATURDAY",
|
||||
"WEEK_ENDING_SUNDAY",
|
||||
"QUARTER_YEAR",
|
||||
]
|
||||
]
|
||||
integrations = [
|
||||
"ssh_tunneling",
|
||||
"query_cancelation",
|
||||
"get_metrics",
|
||||
"get_extra_table_metadata",
|
||||
"dbapi_exception_mapping",
|
||||
"custom_errors",
|
||||
"dynamic_schema",
|
||||
"where_latest_partition",
|
||||
]
|
||||
advanced_features = [
|
||||
"user_impersonation",
|
||||
"expand_data",
|
||||
"query_cost_estimation",
|
||||
"sql_validation",
|
||||
]
|
||||
|
||||
headers = [
|
||||
"Database",
|
||||
"Score",
|
||||
"SQL Basics",
|
||||
"Advanced SQL",
|
||||
"Common Time Grains",
|
||||
"Extended Time Grains",
|
||||
"Integrations",
|
||||
"Advanced Features",
|
||||
]
|
||||
rows = []
|
||||
for db_name, db_info in sorted_info.items():
|
||||
row = [
|
||||
db_name,
|
||||
db_info["score"],
|
||||
calculate_support_level(db_info, sql_basics),
|
||||
calculate_support_level(db_info, advanced_sql),
|
||||
calculate_support_level(db_info, common_grains),
|
||||
calculate_support_level(db_info, extended_grains),
|
||||
calculate_support_level(db_info, integrations),
|
||||
calculate_support_level(db_info, advanced_features),
|
||||
]
|
||||
rows.append(row)
|
||||
output.append(format_markdown_table(headers, rows))
|
||||
|
||||
# Table 2: Database Information
|
||||
output.append("\n### Database Information\n")
|
||||
|
||||
# Custom value extractor for database info to handle limit_method enum
|
||||
def extract_db_info(db_info: dict[str, Any], key: str) -> str:
|
||||
if key == "limit_method":
|
||||
# Convert enum value to name
|
||||
from superset.sql.parse import LimitMethod
|
||||
|
||||
return LimitMethod(db_info[key]).name
|
||||
return db_info.get(key, "")
|
||||
|
||||
table, _ = generate_focused_table(
|
||||
info,
|
||||
feature_keys=["module", "limit_method", "limit_clause", "max_column_name"],
|
||||
column_labels=["Module", "Limit Method", "Limit Clause", "Max Column Name"],
|
||||
value_extractor=extract_db_info,
|
||||
)
|
||||
output.append(table)
|
||||
|
||||
# Table 3: SQL Capabilities (combined SQL Capabilities + Advanced SQL)
|
||||
output.append("\n### SQL Capabilities\n")
|
||||
table, _ = generate_focused_table(
|
||||
info,
|
||||
feature_keys=[
|
||||
"joins",
|
||||
"subqueries",
|
||||
"alias_in_select",
|
||||
"alias_in_orderby",
|
||||
"cte_in_subquery",
|
||||
"sql_comments",
|
||||
"escaped_colons",
|
||||
"time_groupby_inline",
|
||||
"alias_to_source_column",
|
||||
"order_by_not_in_select",
|
||||
"expressions_in_orderby",
|
||||
],
|
||||
column_labels=[
|
||||
"JOINs",
|
||||
"Subqueries",
|
||||
"Aliases in SELECT",
|
||||
"Aliases in ORDER BY",
|
||||
"CTEs",
|
||||
"Comments",
|
||||
"Escaped Colons",
|
||||
"Inline Time Groupby",
|
||||
"Source Column When Aliased",
|
||||
"Aggregations in ORDER BY",
|
||||
"Expressions in ORDER BY",
|
||||
],
|
||||
)
|
||||
output.append(table)
|
||||
|
||||
# Helper to extract time grain values
|
||||
def extract_time_grain(db_info: dict[str, Any], grain_name: str) -> str:
|
||||
return db_info["time_grains"].get(grain_name, False)
|
||||
|
||||
# Table 4: Time Grains – Common
|
||||
output.append("\n### Time Grains – Common\n")
|
||||
common_grains = [
|
||||
"SECOND",
|
||||
"MINUTE",
|
||||
"HOUR",
|
||||
"DAY",
|
||||
"WEEK",
|
||||
"MONTH",
|
||||
"QUARTER",
|
||||
"YEAR",
|
||||
]
|
||||
table, _ = generate_focused_table(
|
||||
info,
|
||||
feature_keys=common_grains,
|
||||
column_labels=common_grains,
|
||||
value_extractor=extract_time_grain,
|
||||
)
|
||||
output.append(table)
|
||||
|
||||
# Table 5: Time Grains – Extended
|
||||
output.append("\n### Time Grains – Extended\n")
|
||||
extended_grains = [
|
||||
"FIVE_SECONDS",
|
||||
"THIRTY_SECONDS",
|
||||
"FIVE_MINUTES",
|
||||
"TEN_MINUTES",
|
||||
"FIFTEEN_MINUTES",
|
||||
"THIRTY_MINUTES",
|
||||
"HALF_HOUR",
|
||||
"SIX_HOURS",
|
||||
"WEEK_STARTING_SUNDAY",
|
||||
"WEEK_STARTING_MONDAY",
|
||||
"WEEK_ENDING_SATURDAY",
|
||||
"WEEK_ENDING_SUNDAY",
|
||||
"QUARTER_YEAR",
|
||||
]
|
||||
table, _ = generate_focused_table(
|
||||
info,
|
||||
feature_keys=extended_grains,
|
||||
column_labels=extended_grains,
|
||||
value_extractor=extract_time_grain,
|
||||
)
|
||||
output.append(table)
|
||||
|
||||
# Table 6: Core Platform & Metadata Features
|
||||
output.append("\n### Core Platform & Metadata Features\n")
|
||||
output.append("\nIntegration with platform features and metadata handling.\n")
|
||||
table, _ = generate_focused_table(
|
||||
info,
|
||||
feature_keys=[
|
||||
"masked_encrypted_extra",
|
||||
"column_type_mapping",
|
||||
"function_names",
|
||||
"file_upload",
|
||||
"dynamic_schema",
|
||||
"catalog",
|
||||
"dynamic_catalog",
|
||||
"ssh_tunneling",
|
||||
"where_latest_partition",
|
||||
"query_cancelation",
|
||||
"get_metrics",
|
||||
"get_extra_table_metadata",
|
||||
"dbapi_exception_mapping",
|
||||
"custom_errors",
|
||||
],
|
||||
column_labels=[
|
||||
"Masked Encrypted Extra",
|
||||
"Column Type Mappings",
|
||||
"Function Names",
|
||||
"File Upload",
|
||||
"Dynamic Schema",
|
||||
"Catalog",
|
||||
"Dynamic Catalog",
|
||||
"SSH Tunneling",
|
||||
"Latest Partition",
|
||||
"Query Cancellation",
|
||||
"Get Metrics",
|
||||
"Extra Table Metadata",
|
||||
"Exception Mapping",
|
||||
"Custom Errors",
|
||||
],
|
||||
)
|
||||
output.append(table)
|
||||
|
||||
# Table 7: Operational & Advanced Features
|
||||
output.append("\n### Operational & Advanced Features\n")
|
||||
table, _ = generate_focused_table(
|
||||
info,
|
||||
feature_keys=[
|
||||
"user_impersonation",
|
||||
"expand_data",
|
||||
"query_cost_estimation",
|
||||
"sql_validation",
|
||||
],
|
||||
column_labels=[
|
||||
"User Impersonation",
|
||||
"Expand Data",
|
||||
"Cost Estimation",
|
||||
"SQL Validation",
|
||||
],
|
||||
)
|
||||
output.append(table)
|
||||
|
||||
return "\n".join(output)
|
||||
|
||||
|
||||
def generate_table() -> list[list[Any]]:
|
||||
"""
|
||||
Generate a table showing info for all DB engine specs.
|
||||
|
||||
DEPRECATED: This function is kept for backward compatibility.
|
||||
Use generate_feature_tables() instead for better readability.
|
||||
"""
|
||||
info = {}
|
||||
for spec in sorted(load_engine_specs(), key=get_name):
|
||||
@@ -301,10 +667,6 @@ if __name__ == "__main__":
|
||||
|
||||
app = create_app()
|
||||
with app.app_context():
|
||||
rows = generate_table()
|
||||
output = generate_feature_tables()
|
||||
|
||||
headers = rows.pop(0)
|
||||
print("| " + " | ".join(headers) + " |")
|
||||
print("| " + " ---| " * len(headers))
|
||||
for row in rows:
|
||||
print("| " + " | ".join(str(col) for col in row) + " |")
|
||||
print(output)
|
||||
|
||||
Reference in New Issue
Block a user