mirror of
https://github.com/apache/superset.git
synced 2026-05-14 12:25:19 +00:00
Compare commits
3 Commits
fix/dashbo
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
01224007da | ||
|
|
d1e9a5df06 | ||
|
|
48530cb888 |
@@ -43,6 +43,9 @@ _build/*
|
||||
_static/*
|
||||
.buildinfo
|
||||
searchindex.js
|
||||
# auto-generated by docs/scripts/convert-api-sidebar.mjs from openapi.json
|
||||
sidebar.js
|
||||
sidebar.ts
|
||||
# auto generated
|
||||
requirements/*
|
||||
# vendorized
|
||||
|
||||
@@ -31,8 +31,9 @@ You are currently in the `/docs` subdirectory of the Apache Superset repository.
|
||||
├── superset-frontend/ # React/TypeScript frontend
|
||||
└── docs/ # Documentation site (YOU ARE HERE)
|
||||
├── docs/ # Main documentation content
|
||||
├── developer_portal/ # Developer guides (currently disabled)
|
||||
├── components/ # Component playground (currently disabled)
|
||||
├── admin_docs/ # Admin-focused guides
|
||||
├── developer_docs/ # Developer guides
|
||||
├── components/ # Component playground
|
||||
└── docusaurus.config.ts # Site configuration
|
||||
```
|
||||
|
||||
@@ -46,12 +47,19 @@ yarn build # Build production site
|
||||
yarn serve # Serve built site locally
|
||||
|
||||
# Version Management (USE THESE, NOT docusaurus commands)
|
||||
# The add scripts auto-run `generate:smart` so auto-gen content (database
|
||||
# pages, API reference, component pages) is fresh before snapshotting.
|
||||
# For maximum-detail databases.json, drop the `database-diagnostics`
|
||||
# artifact from Python-Integration CI at src/data/databases.json before
|
||||
# cutting. See README.md "Before You Cut".
|
||||
yarn version:add:docs <version> # Add new docs version
|
||||
yarn version:add:developer_portal <version> # Add developer portal version
|
||||
yarn version:add:admin_docs <version> # Add admin docs version
|
||||
yarn version:add:developer_docs <version> # Add developer docs version
|
||||
yarn version:add:components <version> # Add components version
|
||||
yarn version:remove:docs <version> # Remove docs version
|
||||
yarn version:remove:developer_portal <version> # Remove developer portal version
|
||||
yarn version:remove:components <version> # Remove components version
|
||||
yarn version:remove:admin_docs <version> # Remove admin docs version
|
||||
yarn version:remove:developer_docs <version> # Remove developer docs version
|
||||
yarn version:remove:components <version> # Remove components version
|
||||
|
||||
# Quality Checks
|
||||
yarn typecheck # TypeScript validation
|
||||
@@ -95,15 +103,14 @@ docs/
|
||||
└── [security guides]
|
||||
```
|
||||
|
||||
### Developer Portal (`/developer_portal`) - Currently Disabled
|
||||
When enabled, contains developer-focused content:
|
||||
- API documentation
|
||||
- Architecture guides
|
||||
- CLI tools
|
||||
- Code examples
|
||||
### Admin Docs (`/admin_docs`)
|
||||
Admin-focused content: installation, configuration, security.
|
||||
|
||||
### Component Playground (`/components`) - Currently Disabled
|
||||
When enabled, provides interactive component examples for UI development.
|
||||
### Developer Docs (`/developer_docs`)
|
||||
Developer-focused content: API documentation, architecture guides, CLI tools, code examples.
|
||||
|
||||
### Component Playground (`/components`)
|
||||
Interactive component examples for UI development.
|
||||
|
||||
## 📝 Documentation Standards
|
||||
|
||||
|
||||
@@ -37,23 +37,45 @@ Each section maintains its own version history and can be versioned independentl
|
||||
|
||||
To create a new version for any section, use the Docusaurus version command with the appropriate plugin ID or use our automated scripts:
|
||||
|
||||
#### Before You Cut
|
||||
|
||||
The cut snapshots whatever's on disk into a frozen historical version, including auto-generated content (database pages from `superset/db_engine_specs/`, API reference from `static/resources/openapi.json`, component pages from Storybook stories). The cut script refreshes these via `generate:smart` before snapshotting, but the **`databases.json` diagnostics file** needs special care to capture full detail:
|
||||
|
||||
1. **Canonical release cut**: download the `database-diagnostics` artifact from a green `Python-Integration` run on master, place it at `docs/src/data/databases.json`, then run the cut script with `--skip-generate` to preserve it. This is what the production deploy uses and includes full Flask-context diagnostics (driver versions, feature support matrix, etc.).
|
||||
2. **Local dev cut**: just run the script normally. `generate:smart` will regenerate `databases.json` using your local Flask environment — accurate to whatever drivers/extras you have installed, but typically less complete than the CI artifact.
|
||||
3. **No Flask available**: also fine — the database generator falls back to AST parsing of engine spec files. The MDX pages are still correct; only the diagnostics JSON is leaner.
|
||||
|
||||
Also: confirm `master` CI is green, and that your local checkout matches the SHA you intend to cut from.
|
||||
|
||||
#### Using Automated Scripts (Required)
|
||||
|
||||
**⚠️ Important:** Always use these custom commands instead of the native Docusaurus commands. These scripts ensure that both the Docusaurus versioning system AND the `versions-config.json` file are updated correctly.
|
||||
**⚠️ Important:** Always use these custom commands instead of the native Docusaurus commands. These scripts ensure that both the Docusaurus versioning system AND the `versions-config.json` file are updated correctly, AND that auto-generated content is refreshed before snapshotting.
|
||||
|
||||
```bash
|
||||
# Main Documentation
|
||||
yarn version:add:docs 1.2.0
|
||||
|
||||
# Developer Portal
|
||||
yarn version:add:developer_portal 1.2.0
|
||||
# Admin Docs
|
||||
yarn version:add:admin_docs 1.2.0
|
||||
|
||||
# Component Playground (when enabled)
|
||||
# Developer Docs
|
||||
yarn version:add:developer_docs 1.2.0
|
||||
|
||||
# Component Playground
|
||||
yarn version:add:components 1.2.0
|
||||
```
|
||||
|
||||
What the script does:
|
||||
1. Refreshes auto-generated content via `generate:smart` (database pages, API reference, component pages).
|
||||
2. Calls `yarn docusaurus docs:version` (or the per-section equivalent) to snapshot the section.
|
||||
3. Freezes any data-file imports (`@site/static/*.json`, `../../data/*.json`) into a snapshot-local `_versioned_data/` dir so the historical version doesn't silently mutate when the source files change.
|
||||
4. Adjusts relative import paths (`../../src/...` → `../../../src/...`) for files now one directory deeper.
|
||||
5. Updates `versions-config.json` and `<section>_versions.json`.
|
||||
|
||||
**Do NOT use** the native Docusaurus commands directly (`yarn docusaurus docs:version`), as they will:
|
||||
- ❌ Create version files but NOT update `versions-config.json`
|
||||
- ❌ Skip auto-gen refresh, freezing whatever was on disk
|
||||
- ❌ Skip data-import freezing, leaving the snapshot pointed at live data
|
||||
- ❌ Cause versions to not appear in dropdown menus
|
||||
- ❌ Require manual fixes to synchronize the configuration
|
||||
|
||||
@@ -91,8 +113,11 @@ If creating versions manually, you'll need to:
|
||||
# Main Documentation
|
||||
yarn version:remove:docs 1.0.0
|
||||
|
||||
# Developer Portal
|
||||
yarn version:remove:developer_portal 1.0.0
|
||||
# Admin Docs
|
||||
yarn version:remove:admin_docs 1.0.0
|
||||
|
||||
# Developer Docs
|
||||
yarn version:remove:developer_docs 1.0.0
|
||||
|
||||
# Component Playground
|
||||
yarn version:remove:components 1.0.0
|
||||
@@ -103,17 +128,20 @@ To manually remove a version:
|
||||
|
||||
1. **Delete the version folder** from the appropriate location:
|
||||
- Main docs: `versioned_docs/version-X.X.X/` (no prefix for main)
|
||||
- Developer Portal: `developer_portal_versioned_docs/version-X.X.X/`
|
||||
- Admin Docs: `admin_docs_versioned_docs/version-X.X.X/`
|
||||
- Developer Docs: `developer_docs_versioned_docs/version-X.X.X/`
|
||||
- Components: `components_versioned_docs/version-X.X.X/`
|
||||
|
||||
2. **Delete the version metadata file**:
|
||||
- Main docs: `versioned_sidebars/version-X.X.X-sidebars.json` (no prefix)
|
||||
- Developer Portal: `developer_portal_versioned_sidebars/version-X.X.X-sidebars.json`
|
||||
- Admin Docs: `admin_docs_versioned_sidebars/version-X.X.X-sidebars.json`
|
||||
- Developer Docs: `developer_docs_versioned_sidebars/version-X.X.X-sidebars.json`
|
||||
- Components: `components_versioned_sidebars/version-X.X.X-sidebars.json`
|
||||
|
||||
3. **Update the versions list file**:
|
||||
- Main docs: `versions.json`
|
||||
- Developer Portal: `developer_portal_versions.json`
|
||||
- Admin Docs: `admin_docs_versions.json`
|
||||
- Developer Docs: `developer_docs_versions.json`
|
||||
- Components: `components_versions.json`
|
||||
|
||||
4. **Update configuration**:
|
||||
@@ -145,12 +173,12 @@ docs: {
|
||||
}
|
||||
```
|
||||
|
||||
#### Developer Portal & Components (custom plugins)
|
||||
#### Developer Docs & Components (custom plugins)
|
||||
```typescript
|
||||
{
|
||||
id: 'developer_portal',
|
||||
path: 'developer_portal',
|
||||
routeBasePath: 'developer_portal',
|
||||
id: 'developer_docs',
|
||||
path: 'developer_docs',
|
||||
routeBasePath: 'developer-docs',
|
||||
includeCurrentVersion: true,
|
||||
lastVersion: '1.1.0', // Default version
|
||||
onlyIncludeVersions: ['current', '1.1.0', '1.0.0'],
|
||||
@@ -194,7 +222,7 @@ For other issues:
|
||||
|
||||
#### Broken Links in Versioned Documentation
|
||||
When creating a new version, links in the documentation are preserved as-is. Common issues:
|
||||
- **Cross-section links**: Links between sections (e.g., from developer_portal to docs) need to be version-aware
|
||||
- **Cross-section links**: Links between sections (e.g., from developer_docs to docs) need to be version-aware
|
||||
- **Absolute vs relative paths**: Use relative paths within the same section
|
||||
- **Version-specific URLs**: Update hardcoded URLs to use version variables
|
||||
|
||||
|
||||
@@ -1 +0,0 @@
|
||||
[]
|
||||
@@ -1 +0,0 @@
|
||||
[]
|
||||
@@ -1 +0,0 @@
|
||||
[]
|
||||
@@ -33,10 +33,12 @@
|
||||
"version:add": "node scripts/manage-versions.mjs add",
|
||||
"version:remove": "node scripts/manage-versions.mjs remove",
|
||||
"version:add:docs": "node scripts/manage-versions.mjs add docs",
|
||||
"version:add:developer_portal": "node scripts/manage-versions.mjs add developer_portal",
|
||||
"version:add:admin_docs": "node scripts/manage-versions.mjs add admin_docs",
|
||||
"version:add:developer_docs": "node scripts/manage-versions.mjs add developer_docs",
|
||||
"version:add:components": "node scripts/manage-versions.mjs add components",
|
||||
"version:remove:docs": "node scripts/manage-versions.mjs remove docs",
|
||||
"version:remove:developer_portal": "node scripts/manage-versions.mjs remove developer_portal",
|
||||
"version:remove:admin_docs": "node scripts/manage-versions.mjs remove admin_docs",
|
||||
"version:remove:developer_docs": "node scripts/manage-versions.mjs remove developer_docs",
|
||||
"version:remove:components": "node scripts/manage-versions.mjs remove components"
|
||||
},
|
||||
"dependencies": {
|
||||
|
||||
@@ -30,9 +30,11 @@ const __dirname = path.dirname(__filename);
|
||||
const CONFIG_FILE = path.join(__dirname, '..', 'versions-config.json');
|
||||
|
||||
// Parse command line arguments
|
||||
const args = process.argv.slice(2);
|
||||
const rawArgs = process.argv.slice(2);
|
||||
const skipGenerate = rawArgs.includes('--skip-generate');
|
||||
const args = rawArgs.filter((a) => a !== '--skip-generate');
|
||||
const command = args[0]; // 'add' or 'remove'
|
||||
const section = args[1]; // 'docs', 'developer_portal', or 'components'
|
||||
const section = args[1]; // 'docs', 'admin_docs', 'developer_docs', or 'components'
|
||||
const version = args[2]; // version string like '1.2.0'
|
||||
|
||||
function loadConfig() {
|
||||
@@ -43,36 +45,158 @@ function saveConfig(config) {
|
||||
fs.writeFileSync(CONFIG_FILE, JSON.stringify(config, null, 2) + '\n');
|
||||
}
|
||||
|
||||
function fixVersionedImports(version) {
|
||||
const versionedDocsPath = path.join(__dirname, '..', 'versioned_docs', `version-${version}`);
|
||||
function freezeDataImports(section, version) {
|
||||
// MDX files can `import` JSON/YAML data from outside the section, either
|
||||
// via escaping relative paths (e.g. country-map-tools.mdx imports
|
||||
// `../../data/countries.json`) or via the `@site/` alias (e.g.
|
||||
// feature-flags.mdx imports `@site/static/feature-flags.json`). Without
|
||||
// intervention the snapshot keeps reading the live file, so the
|
||||
// historical version's content silently changes whenever the data file
|
||||
// is updated. Copy each escaping data import into a snapshot-local
|
||||
// `_versioned_data/` dir and rewrite the import to point there.
|
||||
const sectionRoot = section === 'docs'
|
||||
? path.join(__dirname, '..', 'docs')
|
||||
: path.join(__dirname, '..', section);
|
||||
const docsRoot = path.join(__dirname, '..');
|
||||
const versionedDocsDir = section === 'docs'
|
||||
? `versioned_docs/version-${version}`
|
||||
: `${section}_versioned_docs/version-${version}`;
|
||||
const versionedDocsPath = path.join(__dirname, '..', versionedDocsDir);
|
||||
const frozenDataDir = path.join(versionedDocsPath, '_versioned_data');
|
||||
|
||||
// Files that need import path fixes
|
||||
const filesToFix = [
|
||||
'contributing/resources.mdx',
|
||||
'configuration/country-map-tools.mdx'
|
||||
];
|
||||
if (!fs.existsSync(versionedDocsPath)) {
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(` Fixing relative imports in versioned docs...`);
|
||||
console.log(` Freezing data imports in ${versionedDocsDir}...`);
|
||||
|
||||
filesToFix.forEach(filePath => {
|
||||
const fullPath = path.join(versionedDocsPath, filePath);
|
||||
if (fs.existsSync(fullPath)) {
|
||||
let content = fs.readFileSync(fullPath, 'utf8');
|
||||
// Matches data file imports in two flavors:
|
||||
// `from '../../foo/bar.json'` (relative, must escape one or more dirs)
|
||||
// `from '@site/static/foo.json'` (Docusaurus site-root alias)
|
||||
const dataImportRe = /(from\s+['"])((?:\.\.\/)+|@site\/)([^'"\s]+\.(?:json|ya?ml))(['"])/g;
|
||||
|
||||
// Fix imports that go up two directories to go up three instead
|
||||
content = content.replace(
|
||||
/from ['"]\.\.\/\.\.\/src\//g,
|
||||
"from '../../../src/"
|
||||
);
|
||||
content = content.replace(
|
||||
/from ['"]\.\.\/\.\.\/data\//g,
|
||||
"from '../../../data/"
|
||||
);
|
||||
|
||||
fs.writeFileSync(fullPath, content);
|
||||
console.log(` Fixed imports in ${filePath}`);
|
||||
function freezeOne(fullPath, depth, prefix, pathSpec, importPath, suffix) {
|
||||
let resolvedSource;
|
||||
if (pathSpec === '@site/') {
|
||||
// `@site/...` always resolves relative to the docs root.
|
||||
resolvedSource = path.join(docsRoot, importPath);
|
||||
} else {
|
||||
// Relative path — must escape the file's depth within the section
|
||||
// to point at content outside the section. Imports that stay inside
|
||||
// are copied wholesale by Docusaurus, so we leave them alone.
|
||||
const upCount = pathSpec.match(/\.\.\//g).length;
|
||||
if (upCount <= depth) return null;
|
||||
const relativeFromVersioned = path.relative(versionedDocsPath, fullPath);
|
||||
const originalDir = path.dirname(path.join(sectionRoot, relativeFromVersioned));
|
||||
resolvedSource = path.resolve(originalDir, pathSpec + importPath);
|
||||
}
|
||||
});
|
||||
// Skip imports that land inside the section root — those get copied
|
||||
// with the section snapshot already.
|
||||
const relFromSection = path.relative(sectionRoot, resolvedSource);
|
||||
if (!relFromSection.startsWith('..')) return null;
|
||||
const relFromDocsRoot = path.relative(docsRoot, resolvedSource);
|
||||
if (relFromDocsRoot.startsWith('..') || !fs.existsSync(resolvedSource)) {
|
||||
return null;
|
||||
}
|
||||
const destPath = path.join(frozenDataDir, relFromDocsRoot);
|
||||
fs.mkdirSync(path.dirname(destPath), { recursive: true });
|
||||
fs.copyFileSync(resolvedSource, destPath);
|
||||
const rewritten = path
|
||||
.relative(path.dirname(fullPath), destPath)
|
||||
.split(path.sep)
|
||||
.join('/');
|
||||
const finalImport = rewritten.startsWith('.') ? rewritten : `./${rewritten}`;
|
||||
return `${prefix}${finalImport}${suffix}`;
|
||||
}
|
||||
|
||||
function walk(dir, depth) {
|
||||
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
||||
const fullPath = path.join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
if (entry.name.startsWith('_')) continue;
|
||||
walk(fullPath, depth + 1);
|
||||
} else if (entry.isFile() && /\.(md|mdx)$/.test(entry.name)) {
|
||||
const original = fs.readFileSync(fullPath, 'utf8');
|
||||
let inFence = false;
|
||||
let mutated = false;
|
||||
const updated = original.split('\n').map(line => {
|
||||
if (/^\s*(```|~~~)/.test(line)) {
|
||||
inFence = !inFence;
|
||||
return line;
|
||||
}
|
||||
if (inFence) return line;
|
||||
return line.replace(dataImportRe, (match, prefix, pathSpec, importPath, suffix) => {
|
||||
const rewritten = freezeOne(fullPath, depth, prefix, pathSpec, importPath, suffix);
|
||||
if (rewritten === null) return match;
|
||||
mutated = true;
|
||||
return rewritten;
|
||||
});
|
||||
}).join('\n');
|
||||
if (mutated) {
|
||||
fs.writeFileSync(fullPath, updated);
|
||||
const rel = path.relative(versionedDocsPath, fullPath);
|
||||
console.log(` Froze data imports in ${rel}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
walk(versionedDocsPath, 0);
|
||||
}
|
||||
|
||||
function fixVersionedImports(section, version) {
|
||||
// Versioned content lands one directory deeper than the source content,
|
||||
// so any `../../src/` or `../../data/` imports in .md/.mdx files need
|
||||
// an extra `../` to keep reaching docs/src and docs/data.
|
||||
const versionedDocsDir = section === 'docs'
|
||||
? `versioned_docs/version-${version}`
|
||||
: `${section}_versioned_docs/version-${version}`;
|
||||
const versionedDocsPath = path.join(__dirname, '..', versionedDocsDir);
|
||||
|
||||
if (!fs.existsSync(versionedDocsPath)) {
|
||||
return;
|
||||
}
|
||||
|
||||
console.log(` Fixing relative imports in ${versionedDocsDir}...`);
|
||||
|
||||
// Imports whose `../` count exceeds the file's depth within the section
|
||||
// escape the section root, so they need one extra `../` once the file
|
||||
// lives one level deeper inside the snapshot dir. Imports that stay
|
||||
// inside the section are unaffected (the section copies wholesale).
|
||||
function walk(dir, depth) {
|
||||
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
||||
const fullPath = path.join(dir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
walk(fullPath, depth + 1);
|
||||
} else if (entry.isFile() && /\.(md|mdx)$/.test(entry.name)) {
|
||||
const original = fs.readFileSync(fullPath, 'utf8');
|
||||
// Track fenced code blocks so we don't rewrite import samples inside
|
||||
// ```ts / ```js (etc.) blocks that are documentation, not real imports.
|
||||
let inFence = false;
|
||||
const updated = original.split('\n').map(line => {
|
||||
if (/^\s*(```|~~~)/.test(line)) {
|
||||
inFence = !inFence;
|
||||
return line;
|
||||
}
|
||||
if (inFence) return line;
|
||||
return line.replace(
|
||||
/(from\s+['"])((?:\.\.\/)+)/g,
|
||||
(match, prefix, dots) => {
|
||||
const upCount = dots.match(/\.\.\//g).length;
|
||||
return upCount > depth ? `${prefix}../${dots}` : match;
|
||||
},
|
||||
);
|
||||
}).join('\n');
|
||||
if (updated !== original) {
|
||||
fs.writeFileSync(fullPath, updated);
|
||||
const rel = path.relative(versionedDocsPath, fullPath);
|
||||
console.log(` Fixed imports in ${rel}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
walk(versionedDocsPath, 0);
|
||||
}
|
||||
|
||||
function addVersion(section, version) {
|
||||
@@ -91,6 +215,28 @@ function addVersion(section, version) {
|
||||
|
||||
console.log(`Creating version ${version} for ${section}...`);
|
||||
|
||||
// Refresh auto-generated content (database pages, API reference,
|
||||
// component playground) so the snapshot captures the current state of
|
||||
// master rather than whatever happened to be on disk. `generate:smart`
|
||||
// hashes its inputs and skips unchanged generators, so this is cheap
|
||||
// when the dev already has fresh output.
|
||||
//
|
||||
// Use --skip-generate if you've placed a CI-artifact databases.json
|
||||
// (the `database-diagnostics` artifact from Python-Integration) and
|
||||
// want to preserve it instead of letting the local env regenerate it.
|
||||
// See docs/README.md "Before You Cut" for the canonical release flow.
|
||||
if (skipGenerate) {
|
||||
console.log(` Skipping auto-gen refresh (--skip-generate set)`);
|
||||
} else {
|
||||
console.log(` Refreshing auto-generated docs...`);
|
||||
try {
|
||||
execSync('yarn run generate:smart', { stdio: 'inherit' });
|
||||
} catch (error) {
|
||||
console.error(`Failed to refresh auto-generated docs: ${error.message}`);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Run Docusaurus version command
|
||||
const docusaurusCommand = section === 'docs'
|
||||
? `yarn docusaurus docs:version ${version}`
|
||||
@@ -103,10 +249,12 @@ function addVersion(section, version) {
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Fix relative imports in versioned docs (for main docs section only)
|
||||
if (section === 'docs') {
|
||||
fixVersionedImports(version);
|
||||
}
|
||||
// Freeze data imports BEFORE adjusting paths, so the depth-aware rewriter
|
||||
// doesn't process the now-local imports we just rewrote.
|
||||
freezeDataImports(section, version);
|
||||
|
||||
// Fix relative imports in versioned content
|
||||
fixVersionedImports(section, version);
|
||||
|
||||
// Update config
|
||||
// Add to onlyIncludeVersions array (after 'current')
|
||||
@@ -121,10 +269,15 @@ function addVersion(section, version) {
|
||||
banner: 'none'
|
||||
};
|
||||
|
||||
// Optionally update lastVersion if this is the first non-current version
|
||||
if (config[section].onlyIncludeVersions.length === 2) {
|
||||
config[section].lastVersion = version;
|
||||
}
|
||||
// Note: we deliberately do NOT auto-bump `lastVersion` to the new
|
||||
// version. Superset's docs site keeps `lastVersion: 'current'` so
|
||||
// the canonical URLs (`/user-docs/...`, `/admin-docs/...`,
|
||||
// `/developer-docs/...`, `/components/...`) always render master
|
||||
// content; cut versions are accessed only via their explicit version
|
||||
// segment. (`/docs/...` paths are legacy and handled via per-page
|
||||
// redirects in docusaurus.config.ts — not a current canonical
|
||||
// form.) If you want a different policy, edit versions-config.json
|
||||
// after cutting.
|
||||
|
||||
saveConfig(config);
|
||||
console.log(`✅ Version ${version} added successfully to ${section}`);
|
||||
@@ -185,8 +338,17 @@ function removeVersion(section, version) {
|
||||
const versionIndex = versions.indexOf(version);
|
||||
if (versionIndex > -1) {
|
||||
versions.splice(versionIndex, 1);
|
||||
fs.writeFileSync(versionsJsonPath, JSON.stringify(versions, null, 2) + '\n');
|
||||
console.log(` Updated ${versionsJsonFile}`);
|
||||
if (versions.length === 0) {
|
||||
// Sections with no versions shouldn't carry an empty versions file
|
||||
// on disk — Docusaurus doesn't require it, and an empty `[]` file
|
||||
// gets picked up by `docusaurus version` and snapshotted into the
|
||||
// next cut.
|
||||
fs.unlinkSync(versionsJsonPath);
|
||||
console.log(` Removed empty ${versionsJsonFile}`);
|
||||
} else {
|
||||
fs.writeFileSync(versionsJsonPath, JSON.stringify(versions, null, 2) + '\n');
|
||||
console.log(` Updated ${versionsJsonFile}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -211,17 +373,20 @@ function removeVersion(section, version) {
|
||||
function printUsage() {
|
||||
console.log(`
|
||||
Usage:
|
||||
node scripts/manage-versions.js add <section> <version>
|
||||
node scripts/manage-versions.js remove <section> <version>
|
||||
node scripts/manage-versions.mjs add <section> <version> [--skip-generate]
|
||||
node scripts/manage-versions.mjs remove <section> <version>
|
||||
|
||||
Where:
|
||||
- section: 'docs', 'developer_portal', or 'components'
|
||||
- section: 'docs', 'developer_docs', 'admin_docs', or 'components'
|
||||
- version: version string (e.g., '1.2.0', '2.0.0')
|
||||
- --skip-generate: skip refreshing auto-generated docs before snapshotting
|
||||
(use when you've already placed a fresh databases.json
|
||||
from CI and want to preserve it)
|
||||
|
||||
Examples:
|
||||
node scripts/manage-versions.js add docs 2.0.0
|
||||
node scripts/manage-versions.js add developer_portal 1.3.0
|
||||
node scripts/manage-versions.js remove components 1.0.0
|
||||
node scripts/manage-versions.mjs add docs 2.0.0
|
||||
node scripts/manage-versions.mjs add developer_docs 1.3.0
|
||||
node scripts/manage-versions.mjs remove components 1.0.0
|
||||
`);
|
||||
}
|
||||
|
||||
|
||||
@@ -30,19 +30,30 @@ import { DownOutlined } from '@ant-design/icons';
|
||||
|
||||
import styles from './styles.module.css';
|
||||
|
||||
// Map each versioned plugin id to the URL prefix it actually serves
|
||||
// content from. Three of the four routeBasePath values differ from
|
||||
// their pluginId — the default preset-classic docs plugin lives at
|
||||
// `/user-docs`, and admin_docs / developer_docs use hyphens in their
|
||||
// URLs even though the plugin ids use underscores. Without this map
|
||||
// the basePath derivation below would mis-split the pathname for
|
||||
// those sections and the version dropdown would jump to the section
|
||||
// root instead of preserving the current page.
|
||||
//
|
||||
// Keep in sync with the `routeBasePath` values in docusaurus.config.ts.
|
||||
const PLUGIN_ID_TO_BASE_PATH = {
|
||||
default: '/user-docs',
|
||||
components: '/components',
|
||||
admin_docs: '/admin-docs',
|
||||
developer_docs: '/developer-docs',
|
||||
};
|
||||
|
||||
export default function DocVersionBadge() {
|
||||
const activePlugin = useActivePlugin();
|
||||
const { pathname } = useLocation();
|
||||
const pluginId = activePlugin?.pluginId;
|
||||
const [versionedPath, setVersionedPath] = React.useState('');
|
||||
|
||||
// Show version selector for all versioned sections
|
||||
const isVersioned = [
|
||||
'default', // main docs
|
||||
'components',
|
||||
'tutorials',
|
||||
'developer_portal',
|
||||
].includes(pluginId);
|
||||
const isVersioned = pluginId && pluginId in PLUGIN_ID_TO_BASE_PATH;
|
||||
|
||||
const { preferredVersion } = useDocsPreferredVersion(pluginId);
|
||||
const versions = useVersions(pluginId);
|
||||
@@ -53,7 +64,8 @@ export default function DocVersionBadge() {
|
||||
if (!pathname || !version || !pluginId) return;
|
||||
|
||||
let relativePath = '';
|
||||
const basePath = pluginId === 'default' ? '/docs' : `/${pluginId}`;
|
||||
const basePath = PLUGIN_ID_TO_BASE_PATH[pluginId];
|
||||
if (!basePath) return;
|
||||
|
||||
// Handle different version path patterns
|
||||
if (pathname.includes(basePath)) {
|
||||
|
||||
@@ -1,121 +0,0 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
import React, { useState, useEffect } from 'react';
|
||||
import DocVersionBanner from '@theme-original/DocVersionBanner';
|
||||
import {
|
||||
useActivePlugin,
|
||||
useDocsVersion,
|
||||
useVersions,
|
||||
} from '@docusaurus/plugin-content-docs/client';
|
||||
import { useLocation } from '@docusaurus/router';
|
||||
import { useDocsPreferredVersion } from '@docusaurus/theme-common';
|
||||
import { Dropdown } from 'antd';
|
||||
import { DownOutlined } from '@ant-design/icons';
|
||||
|
||||
import styles from './styles.module.css';
|
||||
|
||||
export default function DocVersionBannerWrapper(props) {
|
||||
const activePlugin = useActivePlugin();
|
||||
const { pathname } = useLocation();
|
||||
const pluginId = activePlugin?.pluginId;
|
||||
const [versionedPath, setVersionedPath] = useState('');
|
||||
|
||||
// Only show version selector for tutorials
|
||||
// Main docs, components, and developer_portal use the DocVersionBadge component instead
|
||||
const isVersioned = pluginId && ['tutorials'].includes(pluginId);
|
||||
|
||||
const { preferredVersion } = useDocsPreferredVersion(pluginId);
|
||||
const versions = useVersions(pluginId);
|
||||
const version = useDocsVersion();
|
||||
|
||||
// Early return if required data is not available
|
||||
if (!isVersioned || !versions || !version) {
|
||||
return <DocVersionBanner {...props} />;
|
||||
}
|
||||
|
||||
// Extract the current page path relative to the version
|
||||
useEffect(() => {
|
||||
if (!pathname || !version || !pluginId) return;
|
||||
|
||||
let relativePath = '';
|
||||
|
||||
// Handle different version path patterns
|
||||
if (pathname.includes(`/${pluginId}/`)) {
|
||||
// Extract the part after the version
|
||||
// Example: /components/1.1.0/ui-components/button -> /ui-components/button
|
||||
const parts = pathname.split(`/${pluginId}/`);
|
||||
if (parts.length > 1) {
|
||||
const afterPluginId = parts[1];
|
||||
// Find where the version part ends
|
||||
const versionParts = afterPluginId.split('/');
|
||||
if (versionParts.length > 1) {
|
||||
// Remove the version part and join the rest
|
||||
relativePath = '/' + versionParts.slice(1).join('/');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setVersionedPath(relativePath);
|
||||
}, [pathname, version, pluginId]);
|
||||
|
||||
// Create dropdown items for version selection
|
||||
const items = versions.map(v => {
|
||||
// Construct the URL for this version, preserving the current page
|
||||
// v.path is the version-specific path like "1.0.0" or "next"
|
||||
let versionUrl = v.path;
|
||||
|
||||
if (versionedPath) {
|
||||
// Construct the full URL with the version and the current page path
|
||||
versionUrl = v.path + versionedPath;
|
||||
}
|
||||
|
||||
return {
|
||||
key: v.name,
|
||||
label: (
|
||||
<a href={versionUrl}>
|
||||
{v.label}
|
||||
{v.name === version.name && ' (current)'}
|
||||
{v.name === preferredVersion?.name && ' (preferred)'}
|
||||
</a>
|
||||
),
|
||||
};
|
||||
});
|
||||
|
||||
return (
|
||||
<>
|
||||
<DocVersionBanner {...props} />
|
||||
{isVersioned && (
|
||||
<div className={styles.versionBanner}>
|
||||
<div className={styles.versionContainer}>
|
||||
<span className={styles.versionLabel}>Version:</span>
|
||||
<Dropdown menu={{ items }} trigger={['click']}>
|
||||
<a
|
||||
onClick={e => e.preventDefault()}
|
||||
className={styles.versionSelector}
|
||||
>
|
||||
{version.label} <DownOutlined />
|
||||
</a>
|
||||
</Dropdown>
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
</>
|
||||
);
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
.versionBanner {
|
||||
background-color: var(--ifm-color-emphasis-100);
|
||||
padding: 0.5rem 1rem;
|
||||
margin-bottom: 1rem;
|
||||
border-bottom: 1px solid var(--ifm-color-emphasis-200);
|
||||
}
|
||||
|
||||
.versionContainer {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
max-width: var(--ifm-container-width);
|
||||
margin: 0 auto;
|
||||
padding: 0 var(--ifm-spacing-horizontal);
|
||||
}
|
||||
|
||||
.versionLabel {
|
||||
font-weight: bold;
|
||||
margin-right: 0.5rem;
|
||||
}
|
||||
|
||||
.versionSelector {
|
||||
cursor: pointer;
|
||||
color: var(--ifm-color-primary);
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.versionSelector:hover {
|
||||
text-decoration: none;
|
||||
color: var(--ifm-color-primary-darker);
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
[
|
||||
"1.0.0"
|
||||
]
|
||||
10
superset-frontend/package-lock.json
generated
10
superset-frontend/package-lock.json
generated
@@ -172,7 +172,7 @@
|
||||
"@babel/preset-env": "^7.29.5",
|
||||
"@babel/preset-react": "^7.28.5",
|
||||
"@babel/preset-typescript": "^7.28.5",
|
||||
"@babel/register": "^7.23.7",
|
||||
"@babel/register": "^7.29.3",
|
||||
"@babel/runtime": "^7.29.2",
|
||||
"@babel/runtime-corejs3": "^7.29.2",
|
||||
"@babel/types": "^7.28.6",
|
||||
@@ -2575,9 +2575,9 @@
|
||||
}
|
||||
},
|
||||
"node_modules/@babel/register": {
|
||||
"version": "7.28.6",
|
||||
"resolved": "https://registry.npmjs.org/@babel/register/-/register-7.28.6.tgz",
|
||||
"integrity": "sha512-pgcbbEl/dWQYb6L6Yew6F94rdwygfuv+vJ/tXfwIOYAfPB6TNWpXUMEtEq3YuTeHRdvMIhvz13bkT9CNaS+wqA==",
|
||||
"version": "7.29.3",
|
||||
"resolved": "https://registry.npmjs.org/@babel/register/-/register-7.29.3.tgz",
|
||||
"integrity": "sha512-F6C1KpIdoImKQfsD6HSxZ+mS4YY/2Q+JsqrmTC5ApVkTR2rG+nnbpjhWwzA5bDNu8mJjB3AryqDaWFLd4gCbJQ==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
@@ -49733,7 +49733,7 @@
|
||||
"dependencies": {
|
||||
"chalk": "^5.6.2",
|
||||
"lodash-es": "^4.18.1",
|
||||
"yeoman-generator": "^8.1.2",
|
||||
"yeoman-generator": "^8.2.2",
|
||||
"yosay": "^3.0.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
@@ -253,7 +253,7 @@
|
||||
"@babel/preset-env": "^7.29.5",
|
||||
"@babel/preset-react": "^7.28.5",
|
||||
"@babel/preset-typescript": "^7.28.5",
|
||||
"@babel/register": "^7.23.7",
|
||||
"@babel/register": "^7.29.3",
|
||||
"@babel/runtime": "^7.29.2",
|
||||
"@babel/runtime-corejs3": "^7.29.2",
|
||||
"@babel/types": "^7.28.6",
|
||||
|
||||
@@ -118,7 +118,7 @@ const NewChartButtonContainer = styled.div`
|
||||
${({ theme }) => css`
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
padding: ${theme.sizeUnit * 3}px ${theme.sizeUnit * 2}px 0;
|
||||
padding-right: ${theme.sizeUnit * 2}px;
|
||||
`}
|
||||
`;
|
||||
|
||||
|
||||
@@ -27,6 +27,55 @@ from superset.utils.pandas_postprocessing.utils import (
|
||||
)
|
||||
|
||||
|
||||
def _restore_dropped_metric_columns(
|
||||
df: DataFrame,
|
||||
expected_metrics: list[str],
|
||||
orig_columns: Optional[DataFrame],
|
||||
) -> DataFrame:
|
||||
"""Re-add metric columns that pivot_table dropped due to all-NaN values.
|
||||
|
||||
When drop_missing_columns=True, pandas pivot_table silently removes columns
|
||||
whose entries are all NaN. This breaks downstream post-processing steps
|
||||
(rename, rolling) that use validate_column_args to assert the columns exist.
|
||||
Restoring the columns as all-NaN preserves the expected schema while still
|
||||
allowing sparse category combinations to be dropped — only metric-level
|
||||
absences are restored.
|
||||
|
||||
Note: this intentionally changes the visible output of drop_missing_columns=True
|
||||
for all-NaN metrics: they are kept as empty series rather than dropped. This is
|
||||
necessary for chart-rendering post-processing to maintain schema stability.
|
||||
|
||||
:param df: Post-pivot DataFrame (may have MultiIndex or flat columns).
|
||||
:param expected_metrics: Metric column names that should exist at level 0.
|
||||
:param orig_columns: Pre-pivot slice of the groupby column(s), used to
|
||||
lazily compute (metric, *col_vals) restoration keys for only the
|
||||
metrics that were entirely absent after pivoting. None for flat pivots.
|
||||
"""
|
||||
if orig_columns is not None:
|
||||
# MultiIndex case. Only compute keys for metrics that were entirely
|
||||
# dropped — skips metrics still present, avoiding O(n_rows × n_metrics)
|
||||
# upfront work when no all-NaN drop occurred.
|
||||
existing_metrics = (
|
||||
set(df.columns.get_level_values(0)) if len(df.columns) > 0 else set()
|
||||
)
|
||||
missing = {m for m in expected_metrics if m not in existing_metrics}
|
||||
if missing:
|
||||
# Dict preserves data-insertion order and deduplicates, so restored
|
||||
# columns appear in deterministic order.
|
||||
keys_dict: dict[tuple[Any, ...], None] = {}
|
||||
for row in orig_columns.itertuples():
|
||||
for metric in missing:
|
||||
keys_dict[(metric, *row[1:])] = None
|
||||
for key in keys_dict:
|
||||
df[key] = float("nan")
|
||||
else:
|
||||
# Flat case (no groupby columns): restore simple metric columns.
|
||||
for metric in expected_metrics:
|
||||
if metric not in df.columns:
|
||||
df[metric] = float("nan")
|
||||
return df
|
||||
|
||||
|
||||
@validate_column_args("index", "columns")
|
||||
def pivot( # pylint: disable=too-many-arguments
|
||||
df: DataFrame,
|
||||
@@ -50,7 +99,11 @@ def pivot( # pylint: disable=too-many-arguments
|
||||
:param column_fill_value: Value to replace missing pivot columns with. By default
|
||||
replaces missing values with "<NULL>". Set to `None` to remove columns
|
||||
with missing values.
|
||||
:param drop_missing_columns: Do not include columns whose entries are all missing
|
||||
:param drop_missing_columns: Do not include columns whose entries are all missing.
|
||||
Note: metric columns entirely absent after pivoting (the whole metric is
|
||||
all-NaN) are restored as empty series so that downstream post-processing
|
||||
(rename, rolling) can reference them. Sparse category combinations where
|
||||
only some (metric, category) pairs are all-NaN may still be dropped.
|
||||
:param combine_value_with_metric: Display metrics side by side within each column,
|
||||
as opposed to each column being displayed side by side for each metric.
|
||||
:param aggregates: A mapping from aggregate column name to the aggregate
|
||||
@@ -79,15 +132,20 @@ def pivot( # pylint: disable=too-many-arguments
|
||||
# Remove once/if support is added.
|
||||
aggfunc = {na.column: na.aggfunc for na in aggregate_funcs.values()}
|
||||
|
||||
# When dropna = False, the pivot_table function will calculate cartesian-product
|
||||
# for MultiIndex.
|
||||
# For drop_missing_columns=False: pre-compute all (metric, *col_vals) tuples
|
||||
# to filter Cartesian-product columns after pivoting.
|
||||
# For drop_missing_columns=True: save a slice of the groupby column data so
|
||||
# that _restore_dropped_metric_columns can build keys lazily — only for metrics
|
||||
# that were actually dropped, avoiding O(n_rows × n_metrics) upfront work in
|
||||
# the common case where no metric is entirely all-NaN.
|
||||
# https://github.com/apache/superset/issues/15956
|
||||
# https://github.com/pandas-dev/pandas/issues/18030
|
||||
series_set = set()
|
||||
pivot_key_set: set[tuple[Any, ...]] = set()
|
||||
if not drop_missing_columns and columns:
|
||||
for row in df[columns].itertuples():
|
||||
for metric in aggfunc.keys():
|
||||
series_set.add(tuple([metric]) + tuple(row[1:])) # noqa: C409
|
||||
pivot_key_set.add((metric, *row[1:]))
|
||||
orig_columns_df = df[columns] if columns else None
|
||||
|
||||
df = df.pivot_table(
|
||||
values=aggfunc.keys(),
|
||||
@@ -100,10 +158,14 @@ def pivot( # pylint: disable=too-many-arguments
|
||||
margins_name=marginal_distribution_name,
|
||||
)
|
||||
|
||||
if not drop_missing_columns and len(series_set) > 0 and not df.empty:
|
||||
df = df.drop(df.columns.difference(series_set), axis=PandasAxis.COLUMN)
|
||||
if drop_missing_columns:
|
||||
df = _restore_dropped_metric_columns(df, list(aggfunc.keys()), orig_columns_df)
|
||||
elif pivot_key_set and not df.empty:
|
||||
df = df.drop(df.columns.difference(pivot_key_set), axis=PandasAxis.COLUMN)
|
||||
|
||||
if combine_value_with_metric:
|
||||
df = df.stack(0).unstack()
|
||||
# dropna=False preserves restored all-NaN metric rows that would otherwise
|
||||
# be silently dropped by stack's default dropna=True behavior.
|
||||
df = df.stack(level=0, dropna=False).unstack()
|
||||
|
||||
return df
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
# under the License.
|
||||
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from pandas import DataFrame, to_datetime
|
||||
|
||||
@@ -203,3 +204,245 @@ def test_pivot_eliminate_cartesian_product_columns():
|
||||
"metric2, 1, 1",
|
||||
]
|
||||
assert np.isnan(df["metric, 1, 1"][0])
|
||||
|
||||
|
||||
def test_pivot_preserves_all_nan_metric_flat():
|
||||
"""
|
||||
Pivot with drop_missing_columns=True must not drop metric columns whose entries
|
||||
are all NaN. This prevents downstream post-processing (e.g. rename) from failing
|
||||
with "Referenced columns not available in DataFrame" when a Jinja metric
|
||||
expression evaluates to NULL for every row (SC-100398).
|
||||
"""
|
||||
mock_df = DataFrame(
|
||||
{
|
||||
"dttm": to_datetime(["2019-01-01", "2019-01-02", "2019-01-03"]),
|
||||
"metric": [np.nan, np.nan, np.nan],
|
||||
}
|
||||
)
|
||||
|
||||
df = pivot(
|
||||
df=mock_df,
|
||||
index=["dttm"],
|
||||
aggregates={"metric": {"operator": "mean"}},
|
||||
drop_missing_columns=True,
|
||||
)
|
||||
|
||||
assert "metric" in df.columns
|
||||
assert df["metric"].isna().all()
|
||||
|
||||
|
||||
def test_pivot_preserves_all_nan_metric_with_columns():
|
||||
"""
|
||||
Pivot with groupby columns and drop_missing_columns=True must restore the
|
||||
exact (metric, category_val) MultiIndex keys when all values for that metric
|
||||
are NaN. The restored keys must use the actual category values from the input
|
||||
data so that downstream rename/rolling validation and flatten produce the
|
||||
correct column names.
|
||||
"""
|
||||
mock_df = DataFrame(
|
||||
{
|
||||
"dttm": to_datetime(["2019-01-01", "2019-01-01"]),
|
||||
"category": ["A", "B"],
|
||||
"metric": [np.nan, np.nan],
|
||||
}
|
||||
)
|
||||
|
||||
df = pivot(
|
||||
df=mock_df,
|
||||
index=["dttm"],
|
||||
columns=["category"],
|
||||
aggregates={"metric": {"operator": "mean"}},
|
||||
drop_missing_columns=True,
|
||||
)
|
||||
|
||||
assert isinstance(df.columns, pd.MultiIndex)
|
||||
assert "metric" in df.columns.get_level_values(0)
|
||||
# Exact keys must reflect the real category values, not placeholders.
|
||||
assert ("metric", "A") in df.columns
|
||||
assert ("metric", "B") in df.columns
|
||||
|
||||
df = flatten(df)
|
||||
assert "metric, A" in df.columns
|
||||
assert "metric, B" in df.columns
|
||||
assert df["metric, A"].isna().all()
|
||||
assert df["metric, B"].isna().all()
|
||||
|
||||
|
||||
def test_pivot_preserves_all_nan_metric_multi_column():
|
||||
"""
|
||||
Pivot with multiple groupby columns and an all-NaN metric restores the full
|
||||
multi-level (metric, col_val_1, col_val_2) key, not a truncated or placeholder
|
||||
version. Exercises the case where columns=["country", "category"].
|
||||
"""
|
||||
mock_df = DataFrame(
|
||||
{
|
||||
"dttm": to_datetime(
|
||||
["2019-01-01", "2019-01-01", "2019-01-01", "2019-01-01"]
|
||||
),
|
||||
"country": ["US", "US", "EU", "EU"],
|
||||
"category": ["A", "B", "A", "B"],
|
||||
"metric": [np.nan, np.nan, np.nan, np.nan],
|
||||
}
|
||||
)
|
||||
|
||||
df = pivot(
|
||||
df=mock_df,
|
||||
index=["dttm"],
|
||||
columns=["country", "category"],
|
||||
aggregates={"metric": {"operator": "mean"}},
|
||||
drop_missing_columns=True,
|
||||
)
|
||||
|
||||
assert isinstance(df.columns, pd.MultiIndex)
|
||||
assert "metric" in df.columns.get_level_values(0)
|
||||
# All four combinations must be restored with correct full tuple keys.
|
||||
assert ("metric", "US", "A") in df.columns
|
||||
assert ("metric", "US", "B") in df.columns
|
||||
assert ("metric", "EU", "A") in df.columns
|
||||
assert ("metric", "EU", "B") in df.columns
|
||||
|
||||
df = flatten(df)
|
||||
assert "metric, US, A" in df.columns
|
||||
assert "metric, EU, B" in df.columns
|
||||
assert df["metric, US, A"].isna().all()
|
||||
|
||||
|
||||
def test_pivot_restored_nan_metric_column_order_is_deterministic():
|
||||
"""
|
||||
Restored all-NaN metric columns must appear in data-insertion order, not
|
||||
in nondeterministic hash-set iteration order. This prevents column ordering
|
||||
from varying across Python processes (which randomize hash seeds by default).
|
||||
"""
|
||||
mock_df = DataFrame(
|
||||
{
|
||||
"dttm": to_datetime(["2019-01-01", "2019-01-01", "2019-01-01"]),
|
||||
"category": ["C", "A", "B"],
|
||||
"metric": [np.nan, np.nan, np.nan],
|
||||
}
|
||||
)
|
||||
|
||||
df = pivot(
|
||||
df=mock_df,
|
||||
index=["dttm"],
|
||||
columns=["category"],
|
||||
aggregates={"metric": {"operator": "mean"}},
|
||||
drop_missing_columns=True,
|
||||
)
|
||||
|
||||
# Columns restored in data-insertion order: C, A, B (not alphabetical or random).
|
||||
assert list(df.columns.get_level_values(1)) == ["C", "A", "B"]
|
||||
|
||||
|
||||
def test_pivot_preserves_all_nan_metric_combine_value_with_metric():
|
||||
"""
|
||||
When combine_value_with_metric=True, a stack()/unstack() is applied after
|
||||
column restoration. stack() drops all-NaN rows by default, which would remove
|
||||
the restored metric before downstream post-processing can reference it.
|
||||
Using dropna=False on stack() ensures restored all-NaN metrics survive.
|
||||
"""
|
||||
mock_df = DataFrame(
|
||||
{
|
||||
"dttm": to_datetime(["2019-01-01", "2019-01-01"]),
|
||||
"category": ["A", "B"],
|
||||
"metric": [np.nan, np.nan],
|
||||
"metric2": [1.0, 2.0],
|
||||
}
|
||||
)
|
||||
|
||||
df = pivot(
|
||||
df=mock_df,
|
||||
index=["dttm"],
|
||||
columns=["category"],
|
||||
aggregates={
|
||||
"metric": {"operator": "mean"},
|
||||
"metric2": {"operator": "mean"},
|
||||
},
|
||||
drop_missing_columns=True,
|
||||
combine_value_with_metric=True,
|
||||
)
|
||||
|
||||
# After stack()/unstack(), columns are (category_val, metric_name) tuples.
|
||||
# The all-NaN metric must appear in level 1 alongside metric2.
|
||||
assert isinstance(df.columns, pd.MultiIndex)
|
||||
metric_names = df.columns.get_level_values(1).tolist()
|
||||
assert "metric" in metric_names
|
||||
assert "metric2" in metric_names
|
||||
|
||||
|
||||
def test_pivot_combine_sparse_metrics_no_spurious_extra_columns():
|
||||
"""
|
||||
With drop_missing_columns=True and combine_value_with_metric=True, using
|
||||
stack(dropna=False) to preserve restored all-NaN metrics must not alter output
|
||||
shape for sparse-but-not-all-NaN metric/category pairs. stack(dropna=False) only
|
||||
changes behaviour for rows that are entirely NaN (a restored metric); sparse rows
|
||||
with at least one non-NaN value are unaffected — same result as dropna=True.
|
||||
"""
|
||||
mock_df = DataFrame(
|
||||
{
|
||||
"dttm": to_datetime(["2019-01-01", "2019-01-01"]),
|
||||
"category": ["A", "B"],
|
||||
"metric1": [1.0, np.nan], # data only for category A
|
||||
"metric2": [np.nan, 2.0], # data only for category B
|
||||
}
|
||||
)
|
||||
|
||||
df = pivot(
|
||||
df=mock_df,
|
||||
index=["dttm"],
|
||||
columns=["category"],
|
||||
aggregates={
|
||||
"metric1": {"operator": "mean"},
|
||||
"metric2": {"operator": "mean"},
|
||||
},
|
||||
drop_missing_columns=True,
|
||||
combine_value_with_metric=True,
|
||||
)
|
||||
|
||||
# After combine, columns are (category_val, metric_name) tuples.
|
||||
# Neither metric is entirely absent after pivoting, so _restore adds nothing.
|
||||
# stack(dropna=False) does not change results for sparse rows with mixed NaN/data.
|
||||
assert isinstance(df.columns, pd.MultiIndex)
|
||||
assert sorted(df.columns.get_level_values(0).unique()) == ["A", "B"]
|
||||
assert sorted(df.columns.get_level_values(1).unique()) == ["metric1", "metric2"]
|
||||
# Sparse NaN cells are present but the data cells must retain their values.
|
||||
assert df[("A", "metric1")].iloc[0] == 1.0
|
||||
assert df[("B", "metric2")].iloc[0] == 2.0
|
||||
|
||||
|
||||
def test_pivot_only_entirely_absent_metrics_are_restored():
|
||||
"""
|
||||
Only metrics with zero surviving columns after pivoting are restored.
|
||||
A metric with partial NaN — data for some categories but not all — must not
|
||||
be touched: its present columns are unchanged and its absent sparse combinations
|
||||
remain dropped. This makes the restoration invariant explicit.
|
||||
"""
|
||||
mock_df = DataFrame(
|
||||
{
|
||||
"dttm": to_datetime(["2019-01-01", "2019-01-01"]),
|
||||
"category": ["A", "B"],
|
||||
"metric_all_nan": [np.nan, np.nan], # entirely absent → restored
|
||||
"metric_partial": [1.0, np.nan], # partially present → not restored
|
||||
}
|
||||
)
|
||||
|
||||
df = pivot(
|
||||
df=mock_df,
|
||||
index=["dttm"],
|
||||
columns=["category"],
|
||||
aggregates={
|
||||
"metric_all_nan": {"operator": "mean"},
|
||||
"metric_partial": {"operator": "mean"},
|
||||
},
|
||||
drop_missing_columns=True,
|
||||
)
|
||||
|
||||
# metric_all_nan was entirely absent: both category columns are restored as NaN.
|
||||
assert ("metric_all_nan", "A") in df.columns
|
||||
assert ("metric_all_nan", "B") in df.columns
|
||||
assert df[("metric_all_nan", "A")].isna().all()
|
||||
assert df[("metric_all_nan", "B")].isna().all()
|
||||
|
||||
# metric_partial has data for A: present column is unchanged, sparse B dropped.
|
||||
assert ("metric_partial", "A") in df.columns
|
||||
assert ("metric_partial", "B") not in df.columns
|
||||
assert df[("metric_partial", "A")].iloc[0] == 1.0
|
||||
|
||||
Reference in New Issue
Block a user