Files
superset2/docs/scripts/lint-docs-links.mjs
2026-05-14 11:07:18 -07:00

231 lines
7.1 KiB
JavaScript

#!/usr/bin/env node
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/**
* lint-docs-links — source-level checks for internal markdown links.
*
* Catches three failure modes that combine to break SPA navigation in
* a Docusaurus build:
*
* 1. BARE — `[X](../foo)` with no extension. Skips
* Docusaurus's file resolver entirely. Emitted
* as a raw href and resolved by the browser
* against the current page URL — usually the
* wrong directory for trailing-slash routes.
* `onBrokenLinks: 'throw'` cannot catch this.
*
* 2. MISSING_TARGET — `[X](./gone.md)` with an extension, but no
* file at that path. The Docusaurus build
* catches this too (via
* `onBrokenMarkdownLinks: 'throw'`) but only
* after a multi-minute build. This script
* flags it in ~1s.
*
* 3. WRONG_EXTENSION — `[X](./foo.md)` where the file is actually
* `foo.mdx` (or vice versa). Same end result
* as MISSING_TARGET, but the fix is one
* character — so we report it as its own
* category with the actual extension on disk.
*
* Skips: fenced code blocks, asset-style targets (.png/.json/etc.),
* external URLs, in-page anchors, and the `versioned_docs/`
* snapshots (those are frozen historical content).
*
* Run from `docs/`:
* node scripts/lint-docs-links.mjs
*
* Exits 0 on clean, 1 on any finding.
*/
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const docsRoot = path.join(__dirname, '..');
const ROOTS = ['docs', 'admin_docs', 'developer_docs', 'components'];
const NON_DOC_EXTENSIONS = new Set([
'.png', '.jpg', '.jpeg', '.gif', '.webp', '.svg', '.ico',
'.json', '.yaml', '.yml', '.txt', '.csv',
'.zip', '.tar', '.gz',
'.pdf',
'.mp4', '.webm', '.mov',
]);
const LINK_RE = /\[[^\]\n]+?\]\((?<url>\.{1,2}\/[^)\s]+?)\)/g;
/**
* Classify a single markdown link from a source file.
* Returns one of: ok / bare / asset / missing-target / wrong-extension.
*/
function classifyLink(sourceFile, url) {
const stripped = url.split('#', 1)[0].split('?', 1)[0];
const ext = path.extname(stripped).toLowerCase();
// Non-doc assets — legit bare extensions, leave alone.
if (ext && NON_DOC_EXTENSIONS.has(ext)) {
return { kind: 'asset' };
}
// Anything that doesn't end in .md/.mdx is a bare relative URL.
if (ext !== '.md' && ext !== '.mdx') {
return { kind: 'bare' };
}
// Has a .md/.mdx extension — make sure the target exists.
const target = path.normalize(path.join(path.dirname(sourceFile), stripped));
if (fs.existsSync(target)) {
return { kind: 'ok' };
}
// Target doesn't exist — check if the OTHER extension does.
const otherExt = ext === '.md' ? '.mdx' : '.md';
const otherTarget = target.slice(0, -ext.length) + otherExt;
if (fs.existsSync(otherTarget)) {
return { kind: 'wrong-extension', actualExt: otherExt };
}
return { kind: 'missing-target' };
}
function* walk(dir) {
const entries = fs.readdirSync(dir, { withFileTypes: true });
for (const entry of entries) {
const full = path.join(dir, entry.name);
if (entry.isDirectory()) {
if (
entry.name.startsWith('.') ||
entry.name === 'node_modules' ||
entry.name.endsWith('_versioned_docs') ||
entry.name === 'versioned_docs'
) {
continue;
}
yield* walk(full);
} else if (entry.isFile()) {
if (entry.name.endsWith('.md') || entry.name.endsWith('.mdx')) {
yield full;
}
}
}
}
function lintFile(file) {
const src = fs.readFileSync(file, 'utf8');
const findings = [];
let inFence = false;
const lines = src.split('\n');
for (let i = 0; i < lines.length; i++) {
const line = lines[i];
if (line.trimStart().startsWith('```')) {
inFence = !inFence;
continue;
}
if (inFence) continue;
for (const m of line.matchAll(LINK_RE)) {
const url = m.groups.url;
const result = classifyLink(file, url);
if (result.kind !== 'ok' && result.kind !== 'asset') {
findings.push({ line: i + 1, url, ...result });
}
}
}
return findings;
}
const findings = [];
for (const root of ROOTS) {
const abs = path.join(docsRoot, root);
if (!fs.existsSync(abs)) continue;
for (const file of walk(abs)) {
for (const f of lintFile(file)) {
findings.push({ file: path.relative(docsRoot, file), ...f });
}
}
}
if (findings.length === 0) {
console.log('✓ lint-docs-links: no broken internal links found');
process.exit(0);
}
// Group by kind for readable output.
const groups = {
bare: [],
'wrong-extension': [],
'missing-target': [],
};
for (const f of findings) {
groups[f.kind].push(f);
}
console.error(
`✗ lint-docs-links: found ${findings.length} broken internal link(s)`
);
console.error('');
if (groups.bare.length) {
console.error(
` ${groups.bare.length} bare relative link(s) (no .md/.mdx extension)`
);
console.error(
" Docusaurus's file resolver skips these; the browser resolves them"
);
console.error(
' against the current page URL — wrong directory for trailing-slash routes.'
);
console.error(' Add the extension so the file resolver picks them up.');
console.error('');
for (const f of groups.bare) {
console.error(` ${f.file}:${f.line} ${f.url}`);
}
console.error('');
}
if (groups['wrong-extension'].length) {
console.error(
` ${groups['wrong-extension'].length} wrong-extension link(s) (.md vs .mdx mismatch)`
);
console.error(' The target file exists with the other extension on disk.');
console.error('');
for (const f of groups['wrong-extension']) {
console.error(
` ${f.file}:${f.line} ${f.url} → use ${f.actualExt}`
);
}
console.error('');
}
if (groups['missing-target'].length) {
console.error(
` ${groups['missing-target'].length} missing-target link(s) (file doesn't exist)`
);
console.error('');
for (const f of groups['missing-target']) {
console.error(` ${f.file}:${f.line} ${f.url}`);
}
console.error('');
}
process.exit(1);