address review: preserve existing JSON for unresolvable cap flags

Cap attrs assigned via expressions (e.g. DruidEngineSpec.allows_joins = is_feature_enabled("DRUID_JOINS")) can't be statically evaluated, so they previously silently fell back to the BaseEngineSpec default — causing generated docs to disagree with runtime behavior. Now the Python extractor tracks unresolvable assignments per class, propagates them through the MRO walk, and emits an _unresolved_cap_fields marker on the database entry. The JS layer uses that marker to prefer the value from the previously-generated databases.json (which was produced with Flask context and reflects real runtime values) and strips the marker before writing output. Verified against druid.py: extraction correctly emits "_unresolved_cap_fields": ["joins"], and the existing JSON's "joins": false is preserved rather than overwritten with the base default of true. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-04-25 11:04:48 +00:00 · 2026-04-22 19:26:07 -07:00
parent ff37592986
commit ebab0327a2
1 changed files with 110 additions and 20 deletions
--- a/docs/scripts/generate-database-docs.mjs
+++ b/docs/scripts/generate-database-docs.mjs
@@ -197,6 +197,21 @@ CAP_ATTR_DEFAULTS = {
    'allows_subqueries': True,
 }

+# Maps source capability attribute -> output field name used in databases.json.
+# When a cap attr is assigned an unevaluable expression (e.g.
+# allows_joins = is_feature_enabled("DRUID_JOINS")), the JS layer uses this
+# mapping to preserve the corresponding field from the previously-generated
+# JSON rather than silently inheriting an incorrect parent default.
+CAP_ATTR_TO_OUTPUT_FIELD = {
+    'allows_joins': 'joins',
+    'allows_subqueries': 'subqueries',
+    'supports_dynamic_schema': 'supports_dynamic_schema',
+    'supports_catalog': 'supports_catalog',
+    'supports_dynamic_catalog': 'supports_dynamic_catalog',
+    'disable_ssh_tunneling': 'ssh_tunneling',
+    'supports_file_upload': 'supports_file_upload',
+}
+
 # Methods that indicate a capability when overridden by a non-BaseEngineSpec class.
 # Mirrors the has_custom_method checks in superset/db_engine_specs/lib.py.
 # cancel_query / get_cancel_query_id / has_implicit_cancel -> query_cancelation
@@ -249,6 +264,11 @@ for filename in sorted(os.listdir(specs_dir)):
            engine_attr = None
            metadata = None
            cap_attrs = {}   # capability flag attributes defined directly in this class
+            # Cap attrs assigned via expressions we can't statically resolve
+            # (e.g. is_feature_enabled("FLAG")). Tracked so the JS layer can
+            # fall back to the previously-generated databases.json value
+            # rather than inherit a parent default that would be wrong.
+            unresolved_cap_attrs = set()
            direct_methods = set()  # capability methods defined directly in this class

            for item in node.body:
@@ -268,9 +288,11 @@ for filename in sorted(os.listdir(specs_dir)):
                            metadata = eval_node(item.value)
                        elif target.id in CAP_ATTR_DEFAULTS:
                            val = eval_node(item.value)
-                            # Only store if we got a concrete bool value (not None/unevaluable)
                            if isinstance(val, bool):
                                cap_attrs[target.id] = val
+                            else:
+                                # Unevaluable expression — defer to JS fallback.
+                                unresolved_cap_attrs.add(target.id)
                elif isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
                    if item.name in CAP_METHODS:
                        direct_methods.add(item.name)
@@ -294,6 +316,7 @@ for filename in sorted(os.listdir(specs_dir)):
                'filename': filename,
                'is_base_or_mixin': is_true_base,
                'cap_attrs': cap_attrs,
+                'unresolved_cap_attrs': unresolved_cap_attrs,
                'direct_methods': direct_methods,
            }
    except Exception as e:
@@ -330,42 +353,59 @@ def get_resolved_caps(class_name, visited=None):
    """
    Resolve capability flags and method overrides with inheritance.

-    Cap attrs: child values override parent values (same as Python MRO).
-    Methods: track which non-BaseEngineSpec classes define them directly,
-    matching the has_custom_method() logic in superset/db_engine_specs/lib.py.
+    Returns (attr_values, unresolved, methods):
+      - attr_values: {attr: bool} for attrs where the nearest MRO assignment
+        was a literal bool. Defaults are applied at the call site.
+      - unresolved: attrs where the nearest MRO assignment was an unevaluable
+        expression (e.g. is_feature_enabled("FLAG")). The JS layer falls
+        back to the previously-generated JSON value for these.
+      - methods: capability methods defined directly in some non-base ancestor,
+        matching the has_custom_method() logic in db_engine_specs/lib.py.
+
+    attr_values and unresolved are disjoint — an attr is in at most one.
    """
    if visited is None:
        visited = set()
    if class_name in visited:
-        return dict(CAP_ATTR_DEFAULTS), set()
+        return {}, set(), set()
    visited.add(class_name)

    info = class_info.get(class_name)
    if not info:
-        return dict(CAP_ATTR_DEFAULTS), set()
+        return {}, set(), set()

-    # Start from base defaults
-    resolved_attrs = dict(CAP_ATTR_DEFAULTS)
+    attr_values = {}
+    unresolved = set()
    resolved_methods = set()

    # Collect from parents, iterating right-to-left so leftmost bases win
    # (matches Python MRO: for class C(A, B), A's attributes take precedence).
-    # update() keeps the LAST value written, so reversing makes the leftmost
-    # base the final writer.
    for base_name in reversed(info['bases']):
-        parent_attrs, parent_methods = get_resolved_caps(base_name, visited.copy())
-        resolved_attrs.update(parent_attrs)
-        resolved_methods.update(parent_methods)
+        p_vals, p_unres, p_meth = get_resolved_caps(base_name, visited.copy())
+        # A parent's literal assignments overwrite whatever we inherited so far.
+        for attr, val in p_vals.items():
+            attr_values[attr] = val
+            unresolved.discard(attr)
+        # A parent's unresolved assignments likewise take precedence.
+        for attr in p_unres:
+            unresolved.add(attr)
+            attr_values.pop(attr, None)
+        resolved_methods.update(p_meth)

-    # Apply this class's own cap_attrs (override parent values)
-    resolved_attrs.update(info['cap_attrs'])
+    # Apply this class's own assignments (override parents).
+    for attr, val in info['cap_attrs'].items():
+        attr_values[attr] = val
+        unresolved.discard(attr)
+    for attr in info['unresolved_cap_attrs']:
+        unresolved.add(attr)
+        attr_values.pop(attr, None)

    # Accumulate method overrides, but skip the literal BaseEngineSpec
-    # (its implementations are stubs; only non-base overrides count)
+    # (its implementations are stubs; only non-base overrides count).
    if class_name != TRUE_BASE_CLASS:
        resolved_methods.update(info['direct_methods'])

-    return resolved_attrs, resolved_methods
+    return attr_values, unresolved, resolved_methods

 for class_name, info in class_info.items():
    # Skip base classes and mixins
@@ -393,10 +433,12 @@ for class_name, info in class_info.items():
        debug_info["classes_with_metadata"] += 1

        # Resolve capability flags from Python source
-        cap_attrs, cap_methods = get_resolved_caps(class_name)
+        attr_values, unresolved_caps, cap_methods = get_resolved_caps(class_name)
+        cap_attrs = dict(CAP_ATTR_DEFAULTS)
+        cap_attrs.update(attr_values)
        engine_attr = info.get('engine') or ''

-        databases[display_name] = {
+        entry = {
            'engine': display_name.lower().replace(' ', '_'),
            'engine_name': display_name,
            'module': info['filename'][:-3],  # Remove .py extension
@@ -422,6 +464,19 @@ for class_name, info in class_info.items():
            ),
        }

+        # Tell the JS layer which output fields were populated from the
+        # BaseEngineSpec default because the source assignment was an
+        # unevaluable expression; those get overridden from existing JSON.
+        unresolved_fields = sorted(
+            CAP_ATTR_TO_OUTPUT_FIELD[attr]
+            for attr in unresolved_caps
+            if attr in CAP_ATTR_TO_OUTPUT_FIELD
+        )
+        if unresolved_fields:
+            entry['_unresolved_cap_fields'] = unresolved_fields
+
+        databases[display_name] = entry
+
 if errors and not databases:
    print(json.dumps({"error": "Parse errors", "details": errors, "debug": debug_info}), file=sys.stderr)

@@ -942,12 +997,41 @@ function loadExistingData() {
  }
 }

+/**
+ * Fall back to the previously-generated databases.json for capability flags
+ * whose source assignment couldn't be statically resolved (e.g.
+ * `allows_joins = is_feature_enabled("DRUID_JOINS")`). The Python extractor
+ * flags these via the internal `_unresolved_cap_fields` marker; without this
+ * fallback those fields would silently inherit the BaseEngineSpec default
+ * and disagree with runtime behavior. The marker is stripped before output.
+ */
+function fallbackUnresolvedCaps(newDatabases, existingData) {
+  for (const [name, db] of Object.entries(newDatabases)) {
+    const unresolved = db._unresolved_cap_fields;
+    if (!unresolved || unresolved.length === 0) {
+      delete db._unresolved_cap_fields;
+      continue;
+    }
+    const existingDb = existingData?.databases?.[name];
+    if (existingDb) {
+      for (const field of unresolved) {
+        if (existingDb[field] !== undefined) {
+          db[field] = existingDb[field];
+        }
+      }
+    }
+    delete db._unresolved_cap_fields;
+  }
+  return newDatabases;
+}
+
 /**
 * Merge new documentation with existing diagnostics
 * Preserves score, max_score, and time_grains from existing data (these require
 * Flask context to generate and cannot be derived from static source analysis).
 * Capability flags (joins, supports_catalog, etc.) are NOT preserved here — they
- * are now read fresh from the Python engine spec source by extractEngineSpecMetadata().
+ * are read fresh from the Python engine spec source by extractEngineSpecMetadata(),
+ * with a separate fallback for expression-based assignments (see fallbackUnresolvedCaps).
 */
 function mergeWithExistingDiagnostics(newDatabases, existingData) {
  if (!existingData?.databases) return newDatabases;
@@ -1017,6 +1101,12 @@ async function main() {
    databases = mergeWithExistingDiagnostics(databases, existingData);
  }

+  // For cap flags assigned via unevaluable expressions (e.g.
+  // `is_feature_enabled(...)`), prefer the value from a previously-generated
+  // JSON. Runs regardless of scores since it addresses static-analysis gaps,
+  // not missing Flask diagnostics. Always strips the internal marker.
+  databases = fallbackUnresolvedCaps(databases, existingData);
+
  // Extract and merge custom_errors for troubleshooting documentation
  const customErrors = extractCustomErrors();
  mergeCustomErrors(databases, customErrors);