Compare commits

...

7 Commits

Author SHA1 Message Date
Vitor Avila
f7e88e992e Merge branch 'master' into fix/word-cloud-druid-topn-sort 2026-04-20 12:50:38 -03:00
bdonovan1
864a07d97c Merge branch 'master' into fix/word-cloud-druid-topn-sort 2026-04-16 10:14:07 -07:00
Brian Donovan
933dc8866a fix(chart): add defensive JSON parsing to migration
Guard against malformed or non-dict params in word_cloud slices
during upgrade/downgrade. Skips invalid rows instead of aborting
the entire migration.
2026-04-15 11:32:21 -07:00
Brian Donovan
3c5e45e39b fix(chart): improve sort_by_series description wording
Remove "secondary" from the description since sort_by_series and
sort_by_metric are independent controls — either can be used alone.
Clarify that the tiebreaker behavior only applies when both are enabled.

Fixes prettier formatting (+ at end of line, not beginning).
2026-04-15 11:28:52 -07:00
Brian Donovan
334e59f381 chore: retrigger CI 2026-04-14 14:26:24 -07:00
Brian Donovan
427d94b4bd fix(chart): add independent sort_by_series control for word cloud
Make sort_by_metric and sort_by_series independent controls instead of
the previous behavior where series sort was unconditionally appended
alongside the metric sort. This allows users to choose metric-only
sorting (enabling Druid TopN optimization) while optionally adding a
secondary series sort for tie-breaking.

Changes:
- buildQuery.ts: sort_by_metric and sort_by_series are now independent
- controlPanel.tsx: add sort_by_series checkbox (default: false)
- buildQuery.test.ts: cover all four sort combinations
- DB migration: set sort_by_series=true for existing word_cloud charts
  to preserve backward compatibility

Fixes #39072
2026-04-13 12:47:13 -07:00
Brian Donovan
d56d31b252 fix(chart): word cloud secondary sort prevents Druid TopN optimization when sort_by_metric enabled
When sort_by_metric is true, buildQuery unconditionally appended a
secondary ORDER BY series ASC alongside the metric sort. On Druid, any
multi-column ORDER BY prevents the native TopN query optimization,
forcing a full GroupBy scan that can cause dramatic slowdowns and
timeouts on high-cardinality dimensions.

Make the series sort mutually exclusive with sort_by_metric, and add
test coverage for both sort behaviors.

Fixes #39072
2026-04-13 12:24:35 -07:00
4 changed files with 172 additions and 13 deletions

View File

@@ -21,7 +21,8 @@ import { buildQueryContext, QueryFormOrderBy } from '@superset-ui/core';
import { WordCloudFormData } from '../types';
export default function buildQuery(formData: WordCloudFormData) {
const { metric, sort_by_metric, series, row_limit } = formData;
const { metric, sort_by_metric, sort_by_series, series, row_limit } =
formData;
const orderby: QueryFormOrderBy[] = [];
const shouldApplyOrderBy =
row_limit !== undefined && row_limit !== null && row_limit !== 0;
@@ -29,7 +30,7 @@ export default function buildQuery(formData: WordCloudFormData) {
if (sort_by_metric && metric) {
orderby.push([metric, false]);
}
if (series) {
if (sort_by_series && series) {
orderby.push([series, true]);
}

View File

@@ -35,6 +35,22 @@ const config: ControlPanelConfig = {
['adhoc_filters'],
['row_limit'],
['sort_by_metric'],
[
{
name: 'sort_by_series',
config: {
type: 'CheckboxControl',
label: t('Sort by series'),
default: false,
description: t(
'Sort results by series name in ascending order. ' +
'When combined with "Sort by metric", this acts as a tiebreaker ' +
'for equal metric values. Adding this sort may reduce query ' +
'performance on some databases.',
),
},
},
],
],
},
{

View File

@@ -21,17 +21,70 @@ import { VizType } from '@superset-ui/core';
import { WordCloudFormData } from '../src';
import buildQuery from '../src/plugin/buildQuery';
describe('WordCloud buildQuery', () => {
const formData: WordCloudFormData = {
datasource: '5__table',
granularity_sqla: 'ds',
series: 'foo',
viz_type: VizType.WordCloud,
};
const basicFormData: WordCloudFormData = {
datasource: '5__table',
granularity_sqla: 'ds',
series: 'foo',
viz_type: VizType.WordCloud,
};
test('should build columns from series in form data', () => {
const queryContext = buildQuery(formData);
const [query] = queryContext.queries;
expect(query.columns).toEqual(['foo']);
describe('plugin-chart-word-cloud', () => {
describe('buildQuery', () => {
test('should build columns from series in form data', () => {
const queryContext = buildQuery(basicFormData);
const [query] = queryContext.queries;
expect(query.columns).toEqual(['foo']);
});
test('should not include orderby when neither sort option is enabled', () => {
const queryContext = buildQuery({
...basicFormData,
metric: 'count',
sort_by_metric: false,
sort_by_series: false,
row_limit: 100,
});
const [query] = queryContext.queries;
expect(query.orderby).toBeUndefined();
});
test('should order by metric DESC only when sort_by_metric is true', () => {
const queryContext = buildQuery({
...basicFormData,
metric: 'count',
sort_by_metric: true,
sort_by_series: false,
row_limit: 100,
});
const [query] = queryContext.queries;
expect(query.orderby).toEqual([['count', false]]);
});
test('should order by series ASC only when sort_by_series is true', () => {
const queryContext = buildQuery({
...basicFormData,
metric: 'count',
sort_by_metric: false,
sort_by_series: true,
row_limit: 100,
});
const [query] = queryContext.queries;
expect(query.orderby).toEqual([['foo', true]]);
});
test('should order by metric DESC then series ASC when both are true', () => {
const queryContext = buildQuery({
...basicFormData,
metric: 'count',
sort_by_metric: true,
sort_by_series: true,
row_limit: 100,
});
const [query] = queryContext.queries;
expect(query.orderby).toEqual([
['count', false],
['foo', true],
]);
});
});
});

View File

@@ -0,0 +1,89 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""add sort_by_series to word_cloud charts
Revision ID: fd0c8583b46d
Revises: ce6bd21901ab
Create Date: 2026-04-13 19:28:19.021839
"""
from alembic import op
from sqlalchemy import Column, Integer, String, Text
from sqlalchemy.ext.declarative import declarative_base
from superset import db
from superset.migrations.shared.utils import paginated_update
from superset.utils import json
# revision identifiers, used by Alembic.
revision = "fd0c8583b46d"
down_revision = "ce6bd21901ab"
Base = declarative_base()
class Slice(Base):
__tablename__ = "slices"
id = Column(Integer, primary_key=True)
viz_type = Column(String(250))
params = Column(Text)
def upgrade_params(params: dict) -> dict:
if "sort_by_series" not in params:
params["sort_by_series"] = True
return params
def downgrade_params(params: dict) -> dict:
params.pop("sort_by_series", None)
return params
def upgrade():
bind = op.get_bind()
session = db.Session(bind=bind)
for slc in paginated_update(
session.query(Slice).filter(Slice.viz_type == "word_cloud")
):
try:
params = json.loads(slc.params or "{}")
if not isinstance(params, dict):
continue
slc.params = json.dumps(upgrade_params(params))
except (json.JSONDecodeError, TypeError):
continue
session.close()
def downgrade():
bind = op.get_bind()
session = db.Session(bind=bind)
for slc in paginated_update(
session.query(Slice).filter(Slice.viz_type == "word_cloud")
):
try:
params = json.loads(slc.params or "{}")
if not isinstance(params, dict):
continue
slc.params = json.dumps(downgrade_params(params))
except (json.JSONDecodeError, TypeError):
continue
session.close()