mirror of
https://github.com/apache/superset.git
synced 2026-05-28 19:25:20 +00:00
fix(docs): tighten onBrokenLinks to throw and fix surfaced broken links (#40102)
Co-authored-by: Claude Code <noreply@anthropic.com>
This commit is contained in:
@@ -1260,7 +1260,15 @@ function generateCategoryIndex(category, components) {
|
||||
};
|
||||
const componentList = components
|
||||
.sort((a, b) => a.componentName.localeCompare(b.componentName))
|
||||
.map(c => `- [${c.componentName}](./${c.componentName.toLowerCase()})`)
|
||||
// `.mdx` suffix matches the actual component page files emitted
|
||||
// by this generator (see the MDX wrappers below). The extension
|
||||
// is required: Docusaurus only validates and rewrites *file-based*
|
||||
// references (.md/.mdx). Bare relative paths bypass the file
|
||||
// resolver and get emitted as raw HTML hrefs that the browser
|
||||
// resolves against the current URL — which gives the wrong
|
||||
// directory for trailing-slash routes and breaks SPA navigation.
|
||||
// See docs/scripts/lint-docs-links.mjs.
|
||||
.map(c => `- [${c.componentName}](./${c.componentName.toLowerCase()}.mdx)`)
|
||||
.join('\n');
|
||||
|
||||
return `---
|
||||
@@ -1366,7 +1374,7 @@ This documentation is auto-generated from Storybook stories. To add or update co
|
||||
4. Run \`yarn generate:superset-components\` in the \`docs/\` directory
|
||||
|
||||
:::info Work in Progress
|
||||
This component library is actively being documented. See the [Components TODO](./TODO) page for a list of components awaiting documentation.
|
||||
This component library is actively being documented. See the [Components TODO](./TODO.md) page for a list of components awaiting documentation.
|
||||
:::
|
||||
|
||||
---
|
||||
|
||||
230
docs/scripts/lint-docs-links.mjs
Normal file
230
docs/scripts/lint-docs-links.mjs
Normal file
@@ -0,0 +1,230 @@
|
||||
#!/usr/bin/env node
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* lint-docs-links — source-level checks for internal markdown links.
|
||||
*
|
||||
* Catches three failure modes that combine to break SPA navigation in
|
||||
* a Docusaurus build:
|
||||
*
|
||||
* 1. BARE — `[X](../foo)` with no extension. Skips
|
||||
* Docusaurus's file resolver entirely. Emitted
|
||||
* as a raw href and resolved by the browser
|
||||
* against the current page URL — usually the
|
||||
* wrong directory for trailing-slash routes.
|
||||
* `onBrokenLinks: 'throw'` cannot catch this.
|
||||
*
|
||||
* 2. MISSING_TARGET — `[X](./gone.md)` with an extension, but no
|
||||
* file at that path. The Docusaurus build
|
||||
* catches this too (via
|
||||
* `onBrokenMarkdownLinks: 'throw'`) but only
|
||||
* after a multi-minute build. This script
|
||||
* flags it in ~1s.
|
||||
*
|
||||
* 3. WRONG_EXTENSION — `[X](./foo.md)` where the file is actually
|
||||
* `foo.mdx` (or vice versa). Same end result
|
||||
* as MISSING_TARGET, but the fix is one
|
||||
* character — so we report it as its own
|
||||
* category with the actual extension on disk.
|
||||
*
|
||||
* Skips: fenced code blocks, asset-style targets (.png/.json/etc.),
|
||||
* external URLs, in-page anchors, and the `versioned_docs/`
|
||||
* snapshots (those are frozen historical content).
|
||||
*
|
||||
* Run from `docs/`:
|
||||
* node scripts/lint-docs-links.mjs
|
||||
*
|
||||
* Exits 0 on clean, 1 on any finding.
|
||||
*/
|
||||
|
||||
import fs from 'node:fs';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
const __filename = fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
const docsRoot = path.join(__dirname, '..');
|
||||
|
||||
const ROOTS = ['docs', 'admin_docs', 'developer_docs', 'components'];
|
||||
|
||||
const NON_DOC_EXTENSIONS = new Set([
|
||||
'.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg', '.ico',
|
||||
'.json', '.yaml', '.yml', '.txt', '.csv',
|
||||
'.zip', '.tar', '.gz',
|
||||
'.pdf',
|
||||
'.mp4', '.webm', '.mov',
|
||||
]);
|
||||
|
||||
const LINK_RE = /\[[^\]\n]+?\]\((?<url>\.{1,2}\/[^)\s]+?)\)/g;
|
||||
|
||||
/**
|
||||
* Classify a single markdown link from a source file.
|
||||
* Returns one of: ok / bare / asset / missing-target / wrong-extension.
|
||||
*/
|
||||
function classifyLink(sourceFile, url) {
|
||||
const stripped = url.split('#', 1)[0].split('?', 1)[0];
|
||||
const ext = path.extname(stripped).toLowerCase();
|
||||
|
||||
// Non-doc assets — legit bare extensions, leave alone.
|
||||
if (ext && NON_DOC_EXTENSIONS.has(ext)) {
|
||||
return { kind: 'asset' };
|
||||
}
|
||||
|
||||
// Anything that doesn't end in .md/.mdx is a bare relative URL.
|
||||
if (ext !== '.md' && ext !== '.mdx') {
|
||||
return { kind: 'bare' };
|
||||
}
|
||||
|
||||
// Has a .md/.mdx extension — make sure the target exists.
|
||||
const target = path.normalize(path.join(path.dirname(sourceFile), stripped));
|
||||
if (fs.existsSync(target)) {
|
||||
return { kind: 'ok' };
|
||||
}
|
||||
|
||||
// Target doesn't exist — check if the OTHER extension does.
|
||||
const otherExt = ext === '.md' ? '.mdx' : '.md';
|
||||
const otherTarget = target.slice(0, -ext.length) + otherExt;
|
||||
if (fs.existsSync(otherTarget)) {
|
||||
return { kind: 'wrong-extension', actualExt: otherExt };
|
||||
}
|
||||
|
||||
return { kind: 'missing-target' };
|
||||
}
|
||||
|
||||
function* walk(dir) {
|
||||
const entries = fs.readdirSync(dir, { withFileTypes: true });
|
||||
for (const entry of entries) {
|
||||
const full = path.join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
if (
|
||||
entry.name.startsWith('.') ||
|
||||
entry.name === 'node_modules' ||
|
||||
entry.name.endsWith('_versioned_docs') ||
|
||||
entry.name === 'versioned_docs'
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
yield* walk(full);
|
||||
} else if (entry.isFile()) {
|
||||
if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
|
||||
yield full;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function lintFile(file) {
|
||||
const src = fs.readFileSync(file, 'utf8');
|
||||
const findings = [];
|
||||
let inFence = false;
|
||||
const lines = src.split('\n');
|
||||
for (let i = 0; i < lines.length; i++) {
|
||||
const line = lines[i];
|
||||
if (line.trimStart().startsWith('```')) {
|
||||
inFence = !inFence;
|
||||
continue;
|
||||
}
|
||||
if (inFence) continue;
|
||||
for (const m of line.matchAll(LINK_RE)) {
|
||||
const url = m.groups.url;
|
||||
const result = classifyLink(file, url);
|
||||
if (result.kind !== 'ok' && result.kind !== 'asset') {
|
||||
findings.push({ line: i + 1, url, ...result });
|
||||
}
|
||||
}
|
||||
}
|
||||
return findings;
|
||||
}
|
||||
|
||||
const findings = [];
|
||||
for (const root of ROOTS) {
|
||||
const abs = path.join(docsRoot, root);
|
||||
if (!fs.existsSync(abs)) continue;
|
||||
for (const file of walk(abs)) {
|
||||
for (const f of lintFile(file)) {
|
||||
findings.push({ file: path.relative(docsRoot, file), ...f });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (findings.length === 0) {
|
||||
console.log('✓ lint-docs-links: no broken internal links found');
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
// Group by kind for readable output.
|
||||
const groups = {
|
||||
bare: [],
|
||||
'wrong-extension': [],
|
||||
'missing-target': [],
|
||||
};
|
||||
for (const f of findings) {
|
||||
groups[f.kind].push(f);
|
||||
}
|
||||
|
||||
console.error(
|
||||
`✗ lint-docs-links: found ${findings.length} broken internal link(s)`
|
||||
);
|
||||
console.error('');
|
||||
|
||||
if (groups.bare.length) {
|
||||
console.error(
|
||||
` ${groups.bare.length} bare relative link(s) (no .md/.mdx extension)`
|
||||
);
|
||||
console.error(
|
||||
" Docusaurus's file resolver skips these; the browser resolves them"
|
||||
);
|
||||
console.error(
|
||||
' against the current page URL — wrong directory for trailing-slash routes.'
|
||||
);
|
||||
console.error(' Add the extension so the file resolver picks them up.');
|
||||
console.error('');
|
||||
for (const f of groups.bare) {
|
||||
console.error(` ${f.file}:${f.line} ${f.url}`);
|
||||
}
|
||||
console.error('');
|
||||
}
|
||||
|
||||
if (groups['wrong-extension'].length) {
|
||||
console.error(
|
||||
` ${groups['wrong-extension'].length} wrong-extension link(s) (.md vs .mdx mismatch)`
|
||||
);
|
||||
console.error(' The target file exists with the other extension on disk.');
|
||||
console.error('');
|
||||
for (const f of groups['wrong-extension']) {
|
||||
console.error(
|
||||
` ${f.file}:${f.line} ${f.url} → use ${f.actualExt}`
|
||||
);
|
||||
}
|
||||
console.error('');
|
||||
}
|
||||
|
||||
if (groups['missing-target'].length) {
|
||||
console.error(
|
||||
` ${groups['missing-target'].length} missing-target link(s) (file doesn't exist)`
|
||||
);
|
||||
console.error('');
|
||||
for (const f of groups['missing-target']) {
|
||||
console.error(` ${f.file}:${f.line} ${f.url}`);
|
||||
}
|
||||
console.error('');
|
||||
}
|
||||
|
||||
process.exit(1);
|
||||
Reference in New Issue
Block a user