mirror of
https://github.com/apache/superset.git
synced 2026-05-08 09:25:56 +00:00
- Removed complex AST node handling for function calls and attributes - Simplified to handle only basic types: constants, lists, dicts, names - Reduced complexity from 19 to 6 branches - Maintains same functionality for actual config values while being more maintainable 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
219 lines
7.2 KiB
Python
Executable File
219 lines
7.2 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Extract configuration schema from config_defaults.py.
|
|
|
|
This script parses the existing config_defaults.py file and extracts:
|
|
- All configuration keys and their default values
|
|
- Comments above each key as descriptions
|
|
- Types inferred from the default values
|
|
|
|
The output is a comprehensive JSON schema that can be used for:
|
|
- Documentation generation
|
|
- Configuration validation
|
|
- IDE autocomplete
|
|
"""
|
|
|
|
import ast
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List
|
|
|
|
|
|
def infer_type(value: Any) -> str:
|
|
"""Infer the configuration type from the default value."""
|
|
if value is None:
|
|
return "null"
|
|
elif isinstance(value, bool):
|
|
return "boolean"
|
|
elif isinstance(value, int):
|
|
return "integer"
|
|
elif isinstance(value, float):
|
|
return "number"
|
|
elif isinstance(value, str):
|
|
return "string"
|
|
elif isinstance(value, (list, tuple)):
|
|
return "array"
|
|
elif isinstance(value, dict):
|
|
return "object"
|
|
else:
|
|
return "unknown"
|
|
|
|
|
|
def extract_comments_before_line(lines: List[str], line_num: int) -> List[str]:
|
|
"""Extract comments immediately before a configuration line."""
|
|
comments: List[str] = []
|
|
current_line = line_num - 2 # line_num is 1-based, so -2 to get previous line
|
|
|
|
# Look backwards for comments, but only go back a few lines to avoid
|
|
# picking up unrelated comments
|
|
max_lookback = min(5, current_line + 1)
|
|
|
|
for i in range(max_lookback):
|
|
if current_line - i < 0:
|
|
break
|
|
|
|
line = lines[current_line - i].strip()
|
|
if line.startswith("#"):
|
|
# Remove the '#' and clean up the comment
|
|
comment = line[1:].strip()
|
|
if comment: # Only add non-empty comments
|
|
comments.insert(0, comment)
|
|
elif line == "":
|
|
# Empty line - continue looking
|
|
continue
|
|
else:
|
|
# Non-comment, non-empty line - stop looking
|
|
break
|
|
|
|
return comments
|
|
|
|
|
|
def safe_eval(node: ast.AST) -> Any:
|
|
"""Safely evaluate an AST node to get its value."""
|
|
try:
|
|
# Handle basic constant values
|
|
if isinstance(node, ast.Constant):
|
|
return node.value
|
|
elif isinstance(node, ast.Num): # Python < 3.8
|
|
return node.n
|
|
elif isinstance(node, ast.Str): # Python < 3.8
|
|
return node.s
|
|
elif isinstance(node, ast.List):
|
|
return [safe_eval(item) for item in node.elts]
|
|
elif isinstance(node, ast.Dict):
|
|
return {
|
|
safe_eval(k): safe_eval(v)
|
|
for k, v in zip(node.keys, node.values, strict=False)
|
|
if k is not None
|
|
}
|
|
elif isinstance(node, ast.Name):
|
|
# Handle common constants
|
|
if node.id in ("True", "False", "None"):
|
|
return {"True": True, "False": False, "None": None}[node.id]
|
|
else:
|
|
return f"<{node.id}>" # Placeholder for variables
|
|
else:
|
|
# For everything else, just return a descriptive placeholder
|
|
return f"<{type(node).__name__}>"
|
|
except Exception:
|
|
return "<unknown>"
|
|
|
|
|
|
def extract_config_schema(config_file: Path) -> Dict[str, Any]:
|
|
"""Extract configuration schema from config_defaults.py."""
|
|
with open(config_file, "r") as f:
|
|
content = f.read()
|
|
lines = content.splitlines()
|
|
|
|
# Parse the Python file
|
|
tree = ast.parse(content)
|
|
|
|
schema = {}
|
|
|
|
for node in ast.walk(tree):
|
|
if isinstance(node, ast.Assign):
|
|
# Check if this is a simple assignment to a variable
|
|
if len(node.targets) == 1 and isinstance(node.targets[0], ast.Name):
|
|
var_name = node.targets[0].id
|
|
|
|
# Only include uppercase variables (configuration convention)
|
|
if var_name.isupper():
|
|
# Get the default value
|
|
default_value = safe_eval(node.value)
|
|
|
|
# Get comments before this line
|
|
comments = extract_comments_before_line(lines, node.lineno)
|
|
description = " ".join(comments) if comments else ""
|
|
|
|
# Infer type from default value
|
|
config_type = infer_type(default_value)
|
|
|
|
# Determine category based on variable name patterns
|
|
category = categorize_config(var_name)
|
|
|
|
schema[var_name] = {
|
|
"type": config_type,
|
|
"default": default_value,
|
|
"description": description,
|
|
"category": category,
|
|
}
|
|
|
|
return schema
|
|
|
|
|
|
def categorize_config(var_name: str) -> str:
|
|
"""Categorize configuration variables based on their names."""
|
|
name_lower = var_name.lower()
|
|
|
|
if any(term in name_lower for term in ["limit", "timeout", "cache", "pool"]):
|
|
return "performance"
|
|
elif any(term in name_lower for term in ["feature", "flag", "enable", "disable"]):
|
|
return "features"
|
|
elif any(term in name_lower for term in ["theme", "color", "style", "ui"]):
|
|
return "ui"
|
|
elif any(term in name_lower for term in ["db", "database", "sql", "query"]):
|
|
return "database"
|
|
elif any(term in name_lower for term in ["auth", "security", "login", "oauth"]):
|
|
return "security"
|
|
elif any(term in name_lower for term in ["log", "debug", "stats"]):
|
|
return "logging"
|
|
elif any(term in name_lower for term in ["mail", "smtp", "email"]):
|
|
return "email"
|
|
elif any(term in name_lower for term in ["celery", "async", "worker"]):
|
|
return "async"
|
|
else:
|
|
return "general"
|
|
|
|
|
|
def main() -> None:
|
|
"""Extract configuration schema and save to JSON."""
|
|
superset_root = Path(__file__).parent.parent
|
|
config_file = superset_root / "superset" / "config_defaults.py"
|
|
|
|
if not config_file.exists():
|
|
print(f"Error: {config_file} not found")
|
|
sys.exit(1)
|
|
|
|
print("Extracting configuration schema...")
|
|
schema = extract_config_schema(config_file)
|
|
|
|
# Create output structure
|
|
output = {
|
|
"metadata": {
|
|
"generated_from": str(config_file),
|
|
"total_configs": len(schema),
|
|
"description": (
|
|
"Superset configuration schema extracted from config_defaults.py"
|
|
),
|
|
},
|
|
"configs": schema,
|
|
"by_category": {},
|
|
}
|
|
|
|
# Group by category
|
|
for key, config in schema.items():
|
|
category = config["category"]
|
|
if category not in output["by_category"]:
|
|
output["by_category"][category] = {}
|
|
output["by_category"][category][key] = config
|
|
|
|
# Save to JSON
|
|
output_file = superset_root / "superset" / "config_schema.json"
|
|
with open(output_file, "w") as f:
|
|
json.dump(output, f, indent=2, default=str)
|
|
|
|
print("✅ Schema extracted successfully!")
|
|
print(f"📊 Total configurations: {len(schema)}")
|
|
print(f"📂 Categories: {list(output['by_category'].keys())}")
|
|
print(f"💾 Saved to: {output_file}")
|
|
|
|
# Show some stats
|
|
print("\n📈 Category breakdown:")
|
|
for category, configs in output["by_category"].items():
|
|
print(f" {category}: {len(configs)} configs")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|