mirror of
https://github.com/apache/superset.git
synced 2026-05-13 03:45:12 +00:00
Compare commits
9 Commits
fix/mcp-ex
...
sc-103393-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
89f9ca7364 | ||
|
|
b89364531f | ||
|
|
95ad8b81c7 | ||
|
|
6846244d8c | ||
|
|
c08706e460 | ||
|
|
5f40f5f1bb | ||
|
|
194831d685 | ||
|
|
aaad620cb3 | ||
|
|
6c00ee2eec |
@@ -67,7 +67,7 @@ export const renameOperator: PostProcessingFactory<PostProcessingRename> = (
|
||||
[...metricOffsetMap.entries()].forEach(
|
||||
([metricWithOffset, metricOnly]) => {
|
||||
const offsetLabel = timeOffsets.find(offset =>
|
||||
metricWithOffset.includes(offset),
|
||||
metricWithOffset.endsWith(`${TIME_COMPARISON_SEPARATOR}${offset}`),
|
||||
);
|
||||
renamePairs.push([
|
||||
formData.comparison_type === ComparisonType.Values
|
||||
|
||||
@@ -26,7 +26,7 @@ export const getTimeOffset = (
|
||||
timeCompare.find(
|
||||
timeOffset =>
|
||||
// offset is represented as <offset>, group by list
|
||||
series.name.includes(`${timeOffset},`) ||
|
||||
series.name.startsWith(`${timeOffset},`) ||
|
||||
// offset is represented as <metric>__<offset>
|
||||
series.name.includes(`__${timeOffset}`) ||
|
||||
// offset is represented as <metric>, <offset>
|
||||
@@ -50,7 +50,9 @@ export const getOriginalSeries = (
|
||||
// offset in the middle: <metric>, <offset>, <dimension>
|
||||
result = result.replace(`, ${compare},`, ',');
|
||||
// offset at start: <offset>, <dimension>
|
||||
result = result.replace(`${compare},`, '');
|
||||
if (result.startsWith(`${compare},`)) {
|
||||
result = result.slice(`${compare},`.length);
|
||||
}
|
||||
// offset with double underscore: <metric>__<offset>
|
||||
result = result.replace(`__${compare}`, '');
|
||||
// offset at end: <metric>, <offset>
|
||||
|
||||
@@ -303,6 +303,30 @@ test('should add renameOperator if multiple metrics exist', () => {
|
||||
});
|
||||
});
|
||||
|
||||
test('should correctly match offsets that share a numeric prefix', () => {
|
||||
expect(
|
||||
renameOperator(
|
||||
{
|
||||
...formData,
|
||||
|
||||
comparison_type: ComparisonType.Values,
|
||||
time_compare: ['1 year ago', '11 year ago'],
|
||||
},
|
||||
queryObject,
|
||||
),
|
||||
).toEqual({
|
||||
operation: 'rename',
|
||||
options: {
|
||||
columns: {
|
||||
'count(*)__1 year ago': '1 year ago',
|
||||
'count(*)__11 year ago': '11 year ago',
|
||||
},
|
||||
inplace: true,
|
||||
level: 0,
|
||||
},
|
||||
});
|
||||
});
|
||||
|
||||
test('should remove renameOperator', () => {
|
||||
expect(
|
||||
renameOperator(
|
||||
|
||||
@@ -114,3 +114,26 @@ test('hasTimeOffset returns false when series name is not a string', () => {
|
||||
const timeCompare = ['1 year ago'];
|
||||
expect(hasTimeOffset(series, timeCompare)).toBe(false);
|
||||
});
|
||||
|
||||
test('getTimeOffset correctly matches offsets that share a numeric prefix', () => {
|
||||
const timeCompare = ['1 year ago', '11 year ago'];
|
||||
expect(
|
||||
getTimeOffset({ name: '11 year ago, Alexander' }, timeCompare),
|
||||
).toEqual('11 year ago');
|
||||
expect(getTimeOffset({ name: '1 year ago, Alexander' }, timeCompare)).toEqual(
|
||||
'1 year ago',
|
||||
);
|
||||
expect(getTimeOffset({ name: 'Births__11 year ago' }, timeCompare)).toEqual(
|
||||
'11 year ago',
|
||||
);
|
||||
});
|
||||
|
||||
test('getOriginalSeries correctly strips offsets that share a numeric prefix', () => {
|
||||
const timeCompare = ['1 year ago', '11 year ago'];
|
||||
expect(getOriginalSeries('11 year ago, Alexander', timeCompare)).toEqual(
|
||||
'Alexander',
|
||||
);
|
||||
expect(getOriginalSeries('1 year ago, Alexander', timeCompare)).toEqual(
|
||||
'Alexander',
|
||||
);
|
||||
});
|
||||
|
||||
@@ -1887,10 +1887,26 @@ class ExploreMixin: # pylint: disable=too-many-public-methods
|
||||
time_grain
|
||||
)
|
||||
|
||||
if join_column_producer and not time_grain:
|
||||
raise QueryObjectValidationError(
|
||||
_("Time Grain must be specified when using Time Shift.")
|
||||
if not time_grain:
|
||||
has_temporal_join_key = any(
|
||||
pd.api.types.is_datetime64_any_dtype(df[key])
|
||||
for key in join_keys
|
||||
if key in df.columns
|
||||
)
|
||||
if has_temporal_join_key:
|
||||
has_relative_offset = any(
|
||||
not (
|
||||
self.is_valid_date_range(offset)
|
||||
and feature_flag_manager.is_feature_enabled(
|
||||
"DATE_RANGE_TIMESHIFTS_ENABLED"
|
||||
)
|
||||
)
|
||||
for offset in offset_dfs
|
||||
)
|
||||
if has_relative_offset:
|
||||
raise QueryObjectValidationError(
|
||||
_("Time Grain must be specified when using Time Comparison.")
|
||||
)
|
||||
|
||||
for offset, offset_df in offset_dfs.items():
|
||||
is_date_range_offset = self.is_valid_date_range(
|
||||
|
||||
@@ -1782,9 +1782,9 @@ def extract_dataframe_dtypes(
|
||||
columns_by_name[column.column_name] = column
|
||||
|
||||
generic_types: list[GenericDataType] = []
|
||||
for column in df.columns:
|
||||
for i, column in enumerate(df.columns):
|
||||
column_object = columns_by_name.get(str(column))
|
||||
series = df[column]
|
||||
series = df.iloc[:, i]
|
||||
inferred_type: str = ""
|
||||
if series.isna().all():
|
||||
sql_type: Optional[str] = ""
|
||||
|
||||
@@ -24,7 +24,7 @@ from flask_babel import gettext as _
|
||||
from pandas import DataFrame, MultiIndex
|
||||
|
||||
from superset.exceptions import InvalidPostProcessingError
|
||||
from superset.utils.core import PostProcessingContributionOrientation
|
||||
from superset.utils.core import PostProcessingContributionOrientation, TIME_COMPARISON
|
||||
from superset.utils.pandas_postprocessing.utils import validate_column_args
|
||||
|
||||
|
||||
@@ -130,7 +130,7 @@ def get_column_groups(
|
||||
time_shift = None
|
||||
if time_shifts and isinstance(col_0, str):
|
||||
for ts in time_shifts:
|
||||
if col_0.endswith(ts):
|
||||
if col_0.endswith(TIME_COMPARISON + ts):
|
||||
time_shift = ts
|
||||
break
|
||||
if time_shift is not None:
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
import pytest
|
||||
from pandas import DataFrame, Series, Timestamp
|
||||
from pandas.testing import assert_frame_equal
|
||||
from pytest import fixture, mark # noqa: PT013
|
||||
@@ -23,6 +24,7 @@ from superset.common.query_context import QueryContext
|
||||
from superset.common.query_context_processor import QueryContextProcessor
|
||||
from superset.connectors.sqla.models import BaseDatasource
|
||||
from superset.constants import TimeGrain
|
||||
from superset.exceptions import QueryObjectValidationError
|
||||
from superset.models.helpers import ExploreMixin
|
||||
|
||||
# Create processor and bind ExploreMixin methods to datasource
|
||||
@@ -242,3 +244,55 @@ def test_join_offset_dfs_totals_query_no_dimensions():
|
||||
)
|
||||
|
||||
assert_frame_equal(expected, result)
|
||||
|
||||
|
||||
def test_join_offset_dfs_raises_without_time_grain():
|
||||
"""Time comparison with relative offsets requires a time grain."""
|
||||
df = DataFrame({"ds": [Timestamp("2021-01-01")], "D": [1]})
|
||||
offset_df = DataFrame({"ds": [Timestamp("2021-02-01")], "B": [5]})
|
||||
offset_dfs = {"1 year ago": offset_df}
|
||||
|
||||
with pytest.raises(
|
||||
QueryObjectValidationError, match="Time Grain must be specified"
|
||||
):
|
||||
query_context_processor.join_offset_dfs(
|
||||
df, offset_dfs, time_grain=None, join_keys=["ds"]
|
||||
)
|
||||
|
||||
|
||||
def test_join_offset_dfs_allows_non_temporal_join_without_time_grain():
|
||||
"""Time comparison without time grain is valid when join keys are non-temporal."""
|
||||
df = DataFrame({"country": ["US", "UK"], "metric": [10, 20]})
|
||||
offset_df = DataFrame({"country": ["US", "UK"], "metric__1 year ago": [8, 15]})
|
||||
offset_dfs = {"1 year ago": offset_df}
|
||||
|
||||
result = query_context_processor.join_offset_dfs(
|
||||
df, offset_dfs, time_grain=None, join_keys=["country"]
|
||||
)
|
||||
assert "metric__1 year ago" in result.columns
|
||||
|
||||
|
||||
def test_join_offset_dfs_raises_when_temporal_key_not_first():
|
||||
"""Temporal join key detection works even when it's not the first key."""
|
||||
df = DataFrame(
|
||||
{
|
||||
"country": ["US", "UK"],
|
||||
"ds": [Timestamp("2021-01-01"), Timestamp("2021-02-01")],
|
||||
"D": [1, 2],
|
||||
}
|
||||
)
|
||||
offset_df = DataFrame(
|
||||
{
|
||||
"country": ["US", "UK"],
|
||||
"ds": [Timestamp("2021-03-01"), Timestamp("2021-04-01")],
|
||||
"B": [5, 6],
|
||||
}
|
||||
)
|
||||
offset_dfs = {"1 year ago": offset_df}
|
||||
|
||||
with pytest.raises(
|
||||
QueryObjectValidationError, match="Time Grain must be specified"
|
||||
):
|
||||
query_context_processor.join_offset_dfs(
|
||||
df, offset_dfs, time_grain=None, join_keys=["country", "ds"]
|
||||
)
|
||||
|
||||
@@ -124,3 +124,36 @@ def test_contribution_with_time_shift_columns():
|
||||
assert_array_equal(processed_df["a__1 week ago"].tolist(), [0.5, 0.5])
|
||||
assert_array_equal(processed_df["b__1 week ago"].tolist(), [0.25, 0.25])
|
||||
assert_array_equal(processed_df["c__1 week ago"].tolist(), [0.25, 0.25])
|
||||
|
||||
|
||||
def test_contribution_with_numeric_prefix_time_shifts():
|
||||
"""Time shifts like '2 weeks ago' and '22 weeks ago' share a numeric suffix;
|
||||
columns must be grouped by their exact offset, not by suffix matching."""
|
||||
df = DataFrame(
|
||||
{
|
||||
DTTM_ALIAS: [
|
||||
datetime(2020, 7, 16, 14, 49),
|
||||
datetime(2020, 7, 16, 14, 50),
|
||||
],
|
||||
"a": [3, 6],
|
||||
"b": [6, 3],
|
||||
"a__2 weeks ago": [1, 1],
|
||||
"b__2 weeks ago": [1, 1],
|
||||
"a__22 weeks ago": [2, 4],
|
||||
"b__22 weeks ago": [4, 2],
|
||||
}
|
||||
)
|
||||
processed_df = contribution(
|
||||
df,
|
||||
orientation=PostProcessingContributionOrientation.ROW,
|
||||
time_shifts=["2 weeks ago", "22 weeks ago"],
|
||||
)
|
||||
# Non-time-shift columns: a=3,b=6 -> a=1/3, b=2/3; a=6,b=3 -> a=2/3, b=1/3
|
||||
assert_array_equal(processed_df["a"].tolist(), [1 / 3, 2 / 3])
|
||||
assert_array_equal(processed_df["b"].tolist(), [2 / 3, 1 / 3])
|
||||
# "2 weeks ago" group: a=1,b=1 -> 0.5,0.5 each row
|
||||
assert_array_equal(processed_df["a__2 weeks ago"].tolist(), [0.5, 0.5])
|
||||
assert_array_equal(processed_df["b__2 weeks ago"].tolist(), [0.5, 0.5])
|
||||
# "22 weeks ago" group: a=2,b=4 -> 1/3,2/3; a=4,b=2 -> 2/3,1/3
|
||||
assert_array_equal(processed_df["a__22 weeks ago"].tolist(), [1 / 3, 2 / 3])
|
||||
assert_array_equal(processed_df["b__22 weeks ago"].tolist(), [2 / 3, 1 / 3])
|
||||
|
||||
@@ -31,6 +31,7 @@ from superset.utils.core import (
|
||||
cast_to_boolean,
|
||||
check_is_safe_zip,
|
||||
DateColumn,
|
||||
extract_dataframe_dtypes,
|
||||
FilterOperator,
|
||||
generic_find_constraint_name,
|
||||
generic_find_fk_constraint_name,
|
||||
@@ -647,8 +648,9 @@ def test_get_user_agent(mocker: MockerFixture, app_context: None) -> None:
|
||||
|
||||
@with_config(
|
||||
{
|
||||
"USER_AGENT_FUNC": lambda database,
|
||||
source: f"{database.database_name} {source.name}"
|
||||
"USER_AGENT_FUNC": lambda database, source: (
|
||||
f"{database.database_name} {source.name}"
|
||||
)
|
||||
}
|
||||
)
|
||||
def test_get_user_agent_custom(mocker: MockerFixture, app_context: None) -> None:
|
||||
@@ -1730,3 +1732,10 @@ def test_markdown_with_markup_wrap() -> None:
|
||||
|
||||
assert isinstance(result, Markup)
|
||||
assert "<strong>bold</strong>" in str(result)
|
||||
|
||||
|
||||
def test_extract_dataframe_dtypes_with_duplicate_columns() -> None:
|
||||
"""extract_dataframe_dtypes should not crash on duplicate column names."""
|
||||
df = pd.DataFrame([[1, 2, 3]], columns=["a", "b", "a"])
|
||||
result = extract_dataframe_dtypes(df)
|
||||
assert len(result) == 3
|
||||
|
||||
Reference in New Issue
Block a user