mirror of
https://github.com/apache/superset.git
synced 2026-04-25 11:04:48 +00:00
address review: preserve existing JSON for unresolvable cap flags
Cap attrs assigned via expressions (e.g. DruidEngineSpec.allows_joins =
is_feature_enabled("DRUID_JOINS")) can't be statically evaluated, so
they previously silently fell back to the BaseEngineSpec default —
causing generated docs to disagree with runtime behavior.
Now the Python extractor tracks unresolvable assignments per class,
propagates them through the MRO walk, and emits an
_unresolved_cap_fields marker on the database entry. The JS layer
uses that marker to prefer the value from the previously-generated
databases.json (which was produced with Flask context and reflects
real runtime values) and strips the marker before writing output.
Verified against druid.py: extraction correctly emits
"_unresolved_cap_fields": ["joins"], and the existing JSON's
"joins": false is preserved rather than overwritten with the base
default of true.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -197,6 +197,21 @@ CAP_ATTR_DEFAULTS = {
|
||||
'allows_subqueries': True,
|
||||
}
|
||||
|
||||
# Maps source capability attribute -> output field name used in databases.json.
|
||||
# When a cap attr is assigned an unevaluable expression (e.g.
|
||||
# allows_joins = is_feature_enabled("DRUID_JOINS")), the JS layer uses this
|
||||
# mapping to preserve the corresponding field from the previously-generated
|
||||
# JSON rather than silently inheriting an incorrect parent default.
|
||||
CAP_ATTR_TO_OUTPUT_FIELD = {
|
||||
'allows_joins': 'joins',
|
||||
'allows_subqueries': 'subqueries',
|
||||
'supports_dynamic_schema': 'supports_dynamic_schema',
|
||||
'supports_catalog': 'supports_catalog',
|
||||
'supports_dynamic_catalog': 'supports_dynamic_catalog',
|
||||
'disable_ssh_tunneling': 'ssh_tunneling',
|
||||
'supports_file_upload': 'supports_file_upload',
|
||||
}
|
||||
|
||||
# Methods that indicate a capability when overridden by a non-BaseEngineSpec class.
|
||||
# Mirrors the has_custom_method checks in superset/db_engine_specs/lib.py.
|
||||
# cancel_query / get_cancel_query_id / has_implicit_cancel -> query_cancelation
|
||||
@@ -249,6 +264,11 @@ for filename in sorted(os.listdir(specs_dir)):
|
||||
engine_attr = None
|
||||
metadata = None
|
||||
cap_attrs = {} # capability flag attributes defined directly in this class
|
||||
# Cap attrs assigned via expressions we can't statically resolve
|
||||
# (e.g. is_feature_enabled("FLAG")). Tracked so the JS layer can
|
||||
# fall back to the previously-generated databases.json value
|
||||
# rather than inherit a parent default that would be wrong.
|
||||
unresolved_cap_attrs = set()
|
||||
direct_methods = set() # capability methods defined directly in this class
|
||||
|
||||
for item in node.body:
|
||||
@@ -268,9 +288,11 @@ for filename in sorted(os.listdir(specs_dir)):
|
||||
metadata = eval_node(item.value)
|
||||
elif target.id in CAP_ATTR_DEFAULTS:
|
||||
val = eval_node(item.value)
|
||||
# Only store if we got a concrete bool value (not None/unevaluable)
|
||||
if isinstance(val, bool):
|
||||
cap_attrs[target.id] = val
|
||||
else:
|
||||
# Unevaluable expression — defer to JS fallback.
|
||||
unresolved_cap_attrs.add(target.id)
|
||||
elif isinstance(item, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
||||
if item.name in CAP_METHODS:
|
||||
direct_methods.add(item.name)
|
||||
@@ -294,6 +316,7 @@ for filename in sorted(os.listdir(specs_dir)):
|
||||
'filename': filename,
|
||||
'is_base_or_mixin': is_true_base,
|
||||
'cap_attrs': cap_attrs,
|
||||
'unresolved_cap_attrs': unresolved_cap_attrs,
|
||||
'direct_methods': direct_methods,
|
||||
}
|
||||
except Exception as e:
|
||||
@@ -330,42 +353,59 @@ def get_resolved_caps(class_name, visited=None):
|
||||
"""
|
||||
Resolve capability flags and method overrides with inheritance.
|
||||
|
||||
Cap attrs: child values override parent values (same as Python MRO).
|
||||
Methods: track which non-BaseEngineSpec classes define them directly,
|
||||
matching the has_custom_method() logic in superset/db_engine_specs/lib.py.
|
||||
Returns (attr_values, unresolved, methods):
|
||||
- attr_values: {attr: bool} for attrs where the nearest MRO assignment
|
||||
was a literal bool. Defaults are applied at the call site.
|
||||
- unresolved: attrs where the nearest MRO assignment was an unevaluable
|
||||
expression (e.g. is_feature_enabled("FLAG")). The JS layer falls
|
||||
back to the previously-generated JSON value for these.
|
||||
- methods: capability methods defined directly in some non-base ancestor,
|
||||
matching the has_custom_method() logic in db_engine_specs/lib.py.
|
||||
|
||||
attr_values and unresolved are disjoint — an attr is in at most one.
|
||||
"""
|
||||
if visited is None:
|
||||
visited = set()
|
||||
if class_name in visited:
|
||||
return dict(CAP_ATTR_DEFAULTS), set()
|
||||
return {}, set(), set()
|
||||
visited.add(class_name)
|
||||
|
||||
info = class_info.get(class_name)
|
||||
if not info:
|
||||
return dict(CAP_ATTR_DEFAULTS), set()
|
||||
return {}, set(), set()
|
||||
|
||||
# Start from base defaults
|
||||
resolved_attrs = dict(CAP_ATTR_DEFAULTS)
|
||||
attr_values = {}
|
||||
unresolved = set()
|
||||
resolved_methods = set()
|
||||
|
||||
# Collect from parents, iterating right-to-left so leftmost bases win
|
||||
# (matches Python MRO: for class C(A, B), A's attributes take precedence).
|
||||
# update() keeps the LAST value written, so reversing makes the leftmost
|
||||
# base the final writer.
|
||||
for base_name in reversed(info['bases']):
|
||||
parent_attrs, parent_methods = get_resolved_caps(base_name, visited.copy())
|
||||
resolved_attrs.update(parent_attrs)
|
||||
resolved_methods.update(parent_methods)
|
||||
p_vals, p_unres, p_meth = get_resolved_caps(base_name, visited.copy())
|
||||
# A parent's literal assignments overwrite whatever we inherited so far.
|
||||
for attr, val in p_vals.items():
|
||||
attr_values[attr] = val
|
||||
unresolved.discard(attr)
|
||||
# A parent's unresolved assignments likewise take precedence.
|
||||
for attr in p_unres:
|
||||
unresolved.add(attr)
|
||||
attr_values.pop(attr, None)
|
||||
resolved_methods.update(p_meth)
|
||||
|
||||
# Apply this class's own cap_attrs (override parent values)
|
||||
resolved_attrs.update(info['cap_attrs'])
|
||||
# Apply this class's own assignments (override parents).
|
||||
for attr, val in info['cap_attrs'].items():
|
||||
attr_values[attr] = val
|
||||
unresolved.discard(attr)
|
||||
for attr in info['unresolved_cap_attrs']:
|
||||
unresolved.add(attr)
|
||||
attr_values.pop(attr, None)
|
||||
|
||||
# Accumulate method overrides, but skip the literal BaseEngineSpec
|
||||
# (its implementations are stubs; only non-base overrides count)
|
||||
# (its implementations are stubs; only non-base overrides count).
|
||||
if class_name != TRUE_BASE_CLASS:
|
||||
resolved_methods.update(info['direct_methods'])
|
||||
|
||||
return resolved_attrs, resolved_methods
|
||||
return attr_values, unresolved, resolved_methods
|
||||
|
||||
for class_name, info in class_info.items():
|
||||
# Skip base classes and mixins
|
||||
@@ -393,10 +433,12 @@ for class_name, info in class_info.items():
|
||||
debug_info["classes_with_metadata"] += 1
|
||||
|
||||
# Resolve capability flags from Python source
|
||||
cap_attrs, cap_methods = get_resolved_caps(class_name)
|
||||
attr_values, unresolved_caps, cap_methods = get_resolved_caps(class_name)
|
||||
cap_attrs = dict(CAP_ATTR_DEFAULTS)
|
||||
cap_attrs.update(attr_values)
|
||||
engine_attr = info.get('engine') or ''
|
||||
|
||||
databases[display_name] = {
|
||||
entry = {
|
||||
'engine': display_name.lower().replace(' ', '_'),
|
||||
'engine_name': display_name,
|
||||
'module': info['filename'][:-3], # Remove .py extension
|
||||
@@ -422,6 +464,19 @@ for class_name, info in class_info.items():
|
||||
),
|
||||
}
|
||||
|
||||
# Tell the JS layer which output fields were populated from the
|
||||
# BaseEngineSpec default because the source assignment was an
|
||||
# unevaluable expression; those get overridden from existing JSON.
|
||||
unresolved_fields = sorted(
|
||||
CAP_ATTR_TO_OUTPUT_FIELD[attr]
|
||||
for attr in unresolved_caps
|
||||
if attr in CAP_ATTR_TO_OUTPUT_FIELD
|
||||
)
|
||||
if unresolved_fields:
|
||||
entry['_unresolved_cap_fields'] = unresolved_fields
|
||||
|
||||
databases[display_name] = entry
|
||||
|
||||
if errors and not databases:
|
||||
print(json.dumps({"error": "Parse errors", "details": errors, "debug": debug_info}), file=sys.stderr)
|
||||
|
||||
@@ -942,12 +997,41 @@ function loadExistingData() {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fall back to the previously-generated databases.json for capability flags
|
||||
* whose source assignment couldn't be statically resolved (e.g.
|
||||
* `allows_joins = is_feature_enabled("DRUID_JOINS")`). The Python extractor
|
||||
* flags these via the internal `_unresolved_cap_fields` marker; without this
|
||||
* fallback those fields would silently inherit the BaseEngineSpec default
|
||||
* and disagree with runtime behavior. The marker is stripped before output.
|
||||
*/
|
||||
function fallbackUnresolvedCaps(newDatabases, existingData) {
|
||||
for (const [name, db] of Object.entries(newDatabases)) {
|
||||
const unresolved = db._unresolved_cap_fields;
|
||||
if (!unresolved || unresolved.length === 0) {
|
||||
delete db._unresolved_cap_fields;
|
||||
continue;
|
||||
}
|
||||
const existingDb = existingData?.databases?.[name];
|
||||
if (existingDb) {
|
||||
for (const field of unresolved) {
|
||||
if (existingDb[field] !== undefined) {
|
||||
db[field] = existingDb[field];
|
||||
}
|
||||
}
|
||||
}
|
||||
delete db._unresolved_cap_fields;
|
||||
}
|
||||
return newDatabases;
|
||||
}
|
||||
|
||||
/**
|
||||
* Merge new documentation with existing diagnostics
|
||||
* Preserves score, max_score, and time_grains from existing data (these require
|
||||
* Flask context to generate and cannot be derived from static source analysis).
|
||||
* Capability flags (joins, supports_catalog, etc.) are NOT preserved here — they
|
||||
* are now read fresh from the Python engine spec source by extractEngineSpecMetadata().
|
||||
* are read fresh from the Python engine spec source by extractEngineSpecMetadata(),
|
||||
* with a separate fallback for expression-based assignments (see fallbackUnresolvedCaps).
|
||||
*/
|
||||
function mergeWithExistingDiagnostics(newDatabases, existingData) {
|
||||
if (!existingData?.databases) return newDatabases;
|
||||
@@ -1017,6 +1101,12 @@ async function main() {
|
||||
databases = mergeWithExistingDiagnostics(databases, existingData);
|
||||
}
|
||||
|
||||
// For cap flags assigned via unevaluable expressions (e.g.
|
||||
// `is_feature_enabled(...)`), prefer the value from a previously-generated
|
||||
// JSON. Runs regardless of scores since it addresses static-analysis gaps,
|
||||
// not missing Flask diagnostics. Always strips the internal marker.
|
||||
databases = fallbackUnresolvedCaps(databases, existingData);
|
||||
|
||||
// Extract and merge custom_errors for troubleshooting documentation
|
||||
const customErrors = extractCustomErrors();
|
||||
mergeCustomErrors(databases, customErrors);
|
||||
|
||||
Reference in New Issue
Block a user