superset2/docs/scripts/extract_custom_errors.py

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

"""
Extract custom_errors from database engine specs for documentation.

This script parses engine spec files to extract error handling information
that can be displayed on database documentation pages.

Usage: python scripts/extract_custom_errors.py
Output: JSON mapping of engine spec module names to their custom errors
"""

import ast
import json  # noqa: TID251 - standalone docs script, not part of superset
import sys
from pathlib import Path
from typing import Any

# Map SupersetErrorType values to human-readable categories and issue codes
ERROR_TYPE_INFO = {
    "CONNECTION_INVALID_USERNAME_ERROR": {
        "category": "Authentication",
        "description": "Invalid username",
        "issue_codes": [1012],
    },
    "CONNECTION_INVALID_PASSWORD_ERROR": {
        "category": "Authentication",
        "description": "Invalid password",
        "issue_codes": [1013],
    },
    "CONNECTION_ACCESS_DENIED_ERROR": {
        "category": "Authentication",
        "description": "Access denied",
        "issue_codes": [1014, 1015],
    },
    "CONNECTION_INVALID_HOSTNAME_ERROR": {
        "category": "Connection",
        "description": "Invalid hostname",
        "issue_codes": [1007],
    },
    "CONNECTION_PORT_CLOSED_ERROR": {
        "category": "Connection",
        "description": "Port closed or refused",
        "issue_codes": [1008],
    },
    "CONNECTION_HOST_DOWN_ERROR": {
        "category": "Connection",
        "description": "Host unreachable",
        "issue_codes": [1009],
    },
    "CONNECTION_UNKNOWN_DATABASE_ERROR": {
        "category": "Connection",
        "description": "Unknown database",
        "issue_codes": [1015],
    },
    "CONNECTION_DATABASE_PERMISSIONS_ERROR": {
        "category": "Permissions",
        "description": "Insufficient permissions",
        "issue_codes": [1017],
    },
    "CONNECTION_MISSING_PARAMETERS_ERROR": {
        "category": "Configuration",
        "description": "Missing parameters",
        "issue_codes": [1018],
    },
    "CONNECTION_DATABASE_TIMEOUT": {
        "category": "Connection",
        "description": "Connection timeout",
        "issue_codes": [1001, 1009],
    },
    "COLUMN_DOES_NOT_EXIST_ERROR": {
        "category": "Query",
        "description": "Column not found",
        "issue_codes": [1003, 1004],
    },
    "TABLE_DOES_NOT_EXIST_ERROR": {
        "category": "Query",
        "description": "Table not found",
        "issue_codes": [1003, 1005],
    },
    "SCHEMA_DOES_NOT_EXIST_ERROR": {
        "category": "Query",
        "description": "Schema not found",
        "issue_codes": [1003, 1016],
    },
    "SYNTAX_ERROR": {
        "category": "Query",
        "description": "SQL syntax error",
        "issue_codes": [1030],
    },
    "OBJECT_DOES_NOT_EXIST_ERROR": {
        "category": "Query",
        "description": "Object not found",
        "issue_codes": [1029],
    },
    "GENERIC_DB_ENGINE_ERROR": {
        "category": "General",
        "description": "Database engine error",
        "issue_codes": [1002],
    },
}


def extract_string_from_call(node: ast.Call) -> str | None:
    """Extract string from __() or _() translation calls."""
    if not node.args:
        return None
    arg = node.args[0]
    if isinstance(arg, ast.Constant) and isinstance(arg.value, str):
        return arg.value
    elif isinstance(arg, ast.JoinedStr):
        # f-string - try to reconstruct
        parts = []
        for value in arg.values:
            if isinstance(value, ast.Constant):
                parts.append(str(value.value))
            elif isinstance(value, ast.FormattedValue):
                # Just use a placeholder
                parts.append("{...}")
        return "".join(parts)
    return None


def extract_custom_errors_from_file(filepath: Path) -> dict[str, list[dict[str, Any]]]:
    """
    Extract custom_errors definitions from a Python engine spec file.

    Returns a dict mapping class names to their custom errors list.
    """
    results = {}

    try:
        with open(filepath, "r", encoding="utf-8") as f:
            source = f.read()

        tree = ast.parse(source)

        for node in ast.walk(tree):
            if isinstance(node, ast.ClassDef):
                class_name = node.name

                for item in node.body:
                    # Look for custom_errors = { ... }
                    if (
                        isinstance(item, ast.AnnAssign)
                        and isinstance(item.target, ast.Name)
                        and item.target.id == "custom_errors"
                        and isinstance(item.value, ast.Dict)
                    ):
                        errors = extract_errors_from_dict(item.value, source)
                        if errors:
                            results[class_name] = errors

                    # Also handle simple assignment: custom_errors = { ... }
                    elif (
                        isinstance(item, ast.Assign)
                        and len(item.targets) == 1
                        and isinstance(item.targets[0], ast.Name)
                        and item.targets[0].id == "custom_errors"
                        and isinstance(item.value, ast.Dict)
                    ):
                        errors = extract_errors_from_dict(item.value, source)
                        if errors:
                            results[class_name] = errors

    except (OSError, SyntaxError, ValueError) as e:
        print(f"Error parsing {filepath}: {e}", file=sys.stderr)

    return results


def extract_regex_info(key: ast.expr) -> dict[str, Any]:
    """Extract regex pattern info from the dict key."""
    if isinstance(key, ast.Name):
        return {"regex_name": key.id}
    if isinstance(key, ast.Call):
        if (
            isinstance(key.func, ast.Attribute)
            and key.func.attr == "compile"
            and key.args
            and isinstance(key.args[0], ast.Constant)
        ):
            return {"regex_pattern": key.args[0].value}
    return {}


def extract_invalid_fields(extra_node: ast.Dict) -> list[str]:
    """Extract invalid fields from the extra dict."""
    for k, v in zip(extra_node.keys, extra_node.values, strict=False):
        if (
            isinstance(k, ast.Constant)
            and k.value == "invalid"
            and isinstance(v, ast.List)
        ):
            return [elem.value for elem in v.elts if isinstance(elem, ast.Constant)]
    return []


def extract_error_tuple_info(value: ast.Tuple) -> dict[str, Any]:
    """Extract error info from the (message, error_type, extra) tuple."""
    result: dict[str, Any] = {}

    # First element: message template
    msg_node = value.elts[0]
    if isinstance(msg_node, ast.Call):
        message = extract_string_from_call(msg_node)
        if message:
            result["message_template"] = message
    elif isinstance(msg_node, ast.Constant):
        result["message_template"] = msg_node.value

    # Second element: SupersetErrorType.SOMETHING
    type_node = value.elts[1]
    if isinstance(type_node, ast.Attribute):
        error_type = type_node.attr
        result["error_type"] = error_type
        if error_type in ERROR_TYPE_INFO:
            type_info = ERROR_TYPE_INFO[error_type]
            result["category"] = type_info["category"]
            result["description"] = type_info["description"]
            result["issue_codes"] = type_info["issue_codes"]

    # Third element: extra dict with invalid fields
    if len(value.elts) >= 3 and isinstance(value.elts[2], ast.Dict):
        invalid_fields = extract_invalid_fields(value.elts[2])
        if invalid_fields:
            result["invalid_fields"] = invalid_fields

    return result


def extract_errors_from_dict(dict_node: ast.Dict, source: str) -> list[dict[str, Any]]:
    """Extract error information from a custom_errors dict AST node."""
    errors = []

    for key, value in zip(dict_node.keys, dict_node.values, strict=False):
        if key is None or value is None:
            continue

        error_info = extract_regex_info(key)

        if isinstance(value, ast.Tuple) and len(value.elts) >= 2:
            error_info.update(extract_error_tuple_info(value))

        if error_info.get("error_type") and error_info.get("message_template"):
            errors.append(error_info)

    return errors


def main() -> None:
    """Main function to extract custom_errors from all engine specs."""
    # Find the superset root directory
    script_dir = Path(__file__).parent
    root_dir = script_dir.parent.parent
    specs_dir = root_dir / "superset" / "db_engine_specs"

    if not specs_dir.exists():
        print(f"Error: Engine specs directory not found: {specs_dir}", file=sys.stderr)
        sys.exit(1)

    all_errors = {}

    # Process each Python file in the specs directory
    for filepath in sorted(specs_dir.glob("*.py")):
        if filepath.name.startswith("_"):
            continue

        module_name = filepath.stem
        class_errors = extract_custom_errors_from_file(filepath)

        if class_errors:
            # Store errors by module and class
            all_errors[module_name] = class_errors

    # Output as JSON
    print(json.dumps(all_errors, indent=2))


if __name__ == "__main__":
    main()