mirror of
https://github.com/apache/superset.git
synced 2026-04-19 08:04:53 +00:00
[New Viz] Partition Diagram (#3642)
* Added Partition Visualization * added unit tests
This commit is contained in:
committed by
Maxime Beauchemin
parent
48e28eff9b
commit
bad6938d1a
BIN
superset/assets/images/viz_thumbnails/partition.png
Normal file
BIN
superset/assets/images/viz_thumbnails/partition.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 194 KiB |
28
superset/assets/javascripts/components/OptionDescription.jsx
Normal file
28
superset/assets/javascripts/components/OptionDescription.jsx
Normal file
@@ -0,0 +1,28 @@
|
||||
import React from 'react';
|
||||
import PropTypes from 'prop-types';
|
||||
|
||||
import InfoTooltipWithTrigger from './InfoTooltipWithTrigger';
|
||||
|
||||
const propTypes = {
|
||||
option: PropTypes.object.isRequired,
|
||||
};
|
||||
|
||||
// This component provides a general tooltip for options
|
||||
// in a SelectControl
|
||||
export default function OptionDescription({ option }) {
|
||||
return (
|
||||
<span>
|
||||
<span className="m-r-5 option-label">
|
||||
{option.label}
|
||||
</span>
|
||||
{option.description &&
|
||||
<InfoTooltipWithTrigger
|
||||
className="m-r-5 text-muted"
|
||||
icon="question-circle-o"
|
||||
tooltip={option.description}
|
||||
label={`descr-${option.label}`}
|
||||
/>
|
||||
}
|
||||
</span>);
|
||||
}
|
||||
OptionDescription.propTypes = propTypes;
|
||||
@@ -4,6 +4,7 @@ import * as v from '../validators';
|
||||
import { ALL_COLOR_SCHEMES, spectrums } from '../../modules/colors';
|
||||
import MetricOption from '../../components/MetricOption';
|
||||
import ColumnOption from '../../components/ColumnOption';
|
||||
import OptionDescription from '../../components/OptionDescription';
|
||||
import { t } from '../../locales';
|
||||
|
||||
const D3_FORMAT_DOCS = 'D3 format syntax: https://github.com/d3/d3-format';
|
||||
@@ -98,6 +99,7 @@ export const controls = {
|
||||
}),
|
||||
description: t('One or many metrics to display'),
|
||||
},
|
||||
|
||||
y_axis_bounds: {
|
||||
type: 'BoundsControl',
|
||||
label: t('Y Axis Bounds'),
|
||||
@@ -108,6 +110,7 @@ export const controls = {
|
||||
"this feature will only expand the axis range. It won't " +
|
||||
"narrow the data's extent."),
|
||||
},
|
||||
|
||||
order_by_cols: {
|
||||
type: 'SelectControl',
|
||||
multi: true,
|
||||
@@ -909,6 +912,16 @@ export const controls = {
|
||||
description: D3_FORMAT_DOCS,
|
||||
},
|
||||
|
||||
date_time_format: {
|
||||
type: 'SelectControl',
|
||||
freeForm: true,
|
||||
label: t('Date Time Format'),
|
||||
renderTrigger: true,
|
||||
default: 'smart_date',
|
||||
choices: D3_TIME_FORMAT_OPTIONS,
|
||||
description: D3_FORMAT_DOCS,
|
||||
},
|
||||
|
||||
markup_type: {
|
||||
type: 'SelectControl',
|
||||
label: t('Markup Type'),
|
||||
@@ -1136,6 +1149,14 @@ export const controls = {
|
||||
description: t('Use a log scale for the X axis'),
|
||||
},
|
||||
|
||||
log_scale: {
|
||||
type: 'CheckboxControl',
|
||||
label: t('Log Scale'),
|
||||
default: false,
|
||||
renderTrigger: true,
|
||||
description: t('Use a log scale'),
|
||||
},
|
||||
|
||||
donut: {
|
||||
type: 'CheckboxControl',
|
||||
label: t('Donut'),
|
||||
@@ -1456,5 +1477,85 @@ export const controls = {
|
||||
controlName: 'TimeSeriesColumnControl',
|
||||
},
|
||||
|
||||
time_series_option: {
|
||||
type: 'SelectControl',
|
||||
label: t('Options'),
|
||||
validators: [v.nonEmpty],
|
||||
default: 'not_time',
|
||||
valueKey: 'value',
|
||||
options: [
|
||||
{
|
||||
label: t('Not Time Series'),
|
||||
value: 'not_time',
|
||||
description: t('Ignore time'),
|
||||
},
|
||||
{
|
||||
label: t('Time Series'),
|
||||
value: 'time_series',
|
||||
description: t('Standard time series'),
|
||||
},
|
||||
{
|
||||
label: t('Aggregate Mean'),
|
||||
value: 'agg_mean',
|
||||
description: t('Mean of values over specified period'),
|
||||
},
|
||||
{
|
||||
label: t('Aggregate Sum'),
|
||||
value: 'agg_sum',
|
||||
description: t('Sum of values over specified period'),
|
||||
},
|
||||
{
|
||||
label: t('Difference'),
|
||||
value: 'point_diff',
|
||||
description: t('Metric change in value from `since` to `until`'),
|
||||
},
|
||||
{
|
||||
label: t('Percent Change'),
|
||||
value: 'point_percent',
|
||||
description: t('Metric percent change in value from `since` to `until`'),
|
||||
},
|
||||
{
|
||||
label: t('Factor'),
|
||||
value: 'point_factor',
|
||||
description: t('Metric factor change from `since` to `until`'),
|
||||
},
|
||||
{
|
||||
label: t('Advanced Analytics'),
|
||||
value: 'adv_anal',
|
||||
description: t('Use the Advanced Analytics options below'),
|
||||
},
|
||||
],
|
||||
optionRenderer: op => <OptionDescription option={op} />,
|
||||
valueRenderer: op => <OptionDescription option={op} />,
|
||||
description: t('Settings for time series'),
|
||||
},
|
||||
|
||||
equal_date_size: {
|
||||
type: 'CheckboxControl',
|
||||
label: t('Equal Date Sizes'),
|
||||
default: true,
|
||||
renderTrigger: true,
|
||||
description: t('Check to force date partitions to have the same height'),
|
||||
},
|
||||
|
||||
partition_limit: {
|
||||
type: 'TextControl',
|
||||
label: t('Partition Limit'),
|
||||
isInt: true,
|
||||
default: '5',
|
||||
description:
|
||||
t('The maximum number of subdivisions of each group; ' +
|
||||
'lower values are pruned first'),
|
||||
},
|
||||
|
||||
partition_threshold: {
|
||||
type: 'TextControl',
|
||||
label: t('Partition Threshold'),
|
||||
isFloat: true,
|
||||
default: '0.05',
|
||||
description:
|
||||
t('Partitions whose height to parent height proportions are ' +
|
||||
'below this value are pruned'),
|
||||
},
|
||||
};
|
||||
export default controls;
|
||||
|
||||
@@ -1155,6 +1155,33 @@ export const visTypes = {
|
||||
},
|
||||
],
|
||||
},
|
||||
|
||||
partition: {
|
||||
label: 'Partition Diagram',
|
||||
showOnExplore: true,
|
||||
controlPanelSections: [
|
||||
sections.NVD3TimeSeries[0],
|
||||
{
|
||||
label: t('Time Series Options'),
|
||||
expanded: true,
|
||||
controlSetRows: [
|
||||
['time_series_option'],
|
||||
],
|
||||
},
|
||||
{
|
||||
label: t('Chart Options'),
|
||||
expanded: true,
|
||||
controlSetRows: [
|
||||
['color_scheme'],
|
||||
['number_format', 'date_time_format'],
|
||||
['partition_limit', 'partition_threshold'],
|
||||
['log_scale', 'equal_date_size'],
|
||||
['rich_tooltip'],
|
||||
],
|
||||
},
|
||||
sections.NVD3TimeSeries[1],
|
||||
],
|
||||
},
|
||||
};
|
||||
|
||||
export default visTypes;
|
||||
|
||||
@@ -52,6 +52,7 @@
|
||||
"d3-sankey": "^0.4.2",
|
||||
"d3-svg-legend": "^1.x",
|
||||
"d3-tip": "^0.6.7",
|
||||
"d3-hierarchy": "^1.1.5",
|
||||
"datamaps": "^0.5.8",
|
||||
"datatables.net-bs": "^1.10.15",
|
||||
"distributions": "^1.0.0",
|
||||
|
||||
@@ -35,5 +35,6 @@ const vizMap = {
|
||||
dual_line: require('./nvd3_vis.js'),
|
||||
event_flow: require('./EventFlow.jsx'),
|
||||
paired_ttest: require('./paired_ttest.jsx'),
|
||||
partition: require('./partition.js'),
|
||||
};
|
||||
export default vizMap;
|
||||
|
||||
27
superset/assets/visualizations/partition.css
Normal file
27
superset/assets/visualizations/partition.css
Normal file
@@ -0,0 +1,27 @@
|
||||
.partition .chart {
|
||||
display: block;
|
||||
margin: auto;
|
||||
font-size: 11px;
|
||||
}
|
||||
|
||||
.partition rect {
|
||||
stroke: #eee;
|
||||
fill: #aaa;
|
||||
fill-opacity: .8;
|
||||
transition: fill-opacity 180ms linear;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.partition rect:hover {
|
||||
fill-opacity: 1;
|
||||
}
|
||||
|
||||
.partition g text {
|
||||
font-weight: bold;
|
||||
pointer-events: none;
|
||||
fill: rgba(0, 0, 0, 0.8);
|
||||
}
|
||||
|
||||
.partition g:hover text {
|
||||
fill: rgba(0, 0, 0, 1);
|
||||
}
|
||||
333
superset/assets/visualizations/partition.js
Normal file
333
superset/assets/visualizations/partition.js
Normal file
@@ -0,0 +1,333 @@
|
||||
/* eslint no-param-reassign: [2, {"props": false}] */
|
||||
/* eslint no-use-before-define: ["error", { "functions": false }] */
|
||||
import d3 from 'd3';
|
||||
import {
|
||||
d3TimeFormatPreset,
|
||||
} from '../javascripts/modules/utils';
|
||||
import { getColorFromScheme } from '../javascripts/modules/colors';
|
||||
|
||||
import './partition.css';
|
||||
|
||||
d3.hierarchy = require('d3-hierarchy').hierarchy;
|
||||
d3.partition = require('d3-hierarchy').partition;
|
||||
|
||||
function init(root) {
|
||||
// Compute dx, dy, x, y for each node and
|
||||
// return an array of nodes in breadth-first order
|
||||
const flat = [];
|
||||
const dy = 1.0 / (root.height + 1);
|
||||
let prev = null;
|
||||
root.each((n) => {
|
||||
n.y = dy * n.depth;
|
||||
n.dy = dy;
|
||||
if (!n.parent) {
|
||||
n.x = 0;
|
||||
n.dx = 1;
|
||||
} else {
|
||||
n.x = prev.depth === n.parent.depth ? 0 : prev.x + prev.dx;
|
||||
n.dx = n.weight / n.parent.sum * n.parent.dx;
|
||||
}
|
||||
prev = n;
|
||||
flat.push(n);
|
||||
});
|
||||
return flat;
|
||||
}
|
||||
|
||||
// This vis is based on
|
||||
// http://mbostock.github.io/d3/talk/20111018/partition.html
|
||||
function partitionVis(slice, payload) {
|
||||
const data = payload.data;
|
||||
const fd = slice.formData;
|
||||
const div = d3.select(slice.selector);
|
||||
const metrics = fd.metrics || [];
|
||||
|
||||
// Chart options
|
||||
const logScale = fd.log_scale || false;
|
||||
const chartType = fd.time_series_option || 'not_time';
|
||||
const hasTime = ['adv_anal', 'time_series'].indexOf(chartType) >= 0;
|
||||
const format = d3.format(fd.number_format);
|
||||
const timeFormat = d3TimeFormatPreset(fd.date_time_format);
|
||||
|
||||
div.selectAll('*').remove();
|
||||
d3.selectAll('.nvtooltip').remove();
|
||||
const tooltip = d3
|
||||
.select('body')
|
||||
.append('div')
|
||||
.attr('class', 'nvtooltip')
|
||||
.style('opacity', 0)
|
||||
.style('top', 0)
|
||||
.style('left', 0)
|
||||
.style('position', 'fixed');
|
||||
|
||||
function drawVis(i, dat) {
|
||||
const datum = dat[i];
|
||||
const w = slice.width();
|
||||
const h = slice.height() / data.length;
|
||||
const x = d3.scale.linear().range([0, w]);
|
||||
const y = d3.scale.linear().range([0, h]);
|
||||
|
||||
const viz = div
|
||||
.append('div')
|
||||
.attr('class', 'chart')
|
||||
.style('width', w + 'px')
|
||||
.style('height', h + 'px')
|
||||
.append('svg:svg')
|
||||
.attr('width', w)
|
||||
.attr('height', h);
|
||||
|
||||
// Add padding between multiple visualizations
|
||||
if (i !== data.length - 1 && data.length > 1) {
|
||||
viz.style('padding-bottom', '3px');
|
||||
}
|
||||
if (i !== 0 && data.length > 1) {
|
||||
viz.style('padding-top', '3px');
|
||||
}
|
||||
|
||||
const root = d3.hierarchy(datum);
|
||||
|
||||
function hasDateNode(n) {
|
||||
return metrics.indexOf(n.data.name) >= 0 && hasTime;
|
||||
}
|
||||
|
||||
// node.name is the metric/group name
|
||||
// node.disp is the display value
|
||||
// node.value determines sorting order
|
||||
// node.weight determines partition height
|
||||
// node.sum is the sum of children weights
|
||||
root.eachAfter((n) => {
|
||||
n.disp = n.data.val;
|
||||
n.value = n.disp < 0 ? -n.disp : n.disp;
|
||||
n.weight = n.value;
|
||||
n.name = n.data.name;
|
||||
// If the parent is a metric and we still have
|
||||
// the time column, perform a date-time format
|
||||
if (n.parent && hasDateNode(n.parent)) {
|
||||
// Format timestamp values
|
||||
n.weight = fd.equal_date_size ? 1 : n.value;
|
||||
n.value = n.name;
|
||||
n.name = timeFormat(n.name);
|
||||
}
|
||||
if (logScale) n.weight = Math.log(n.weight + 1);
|
||||
n.disp = n.disp && !isNaN(n.disp) && isFinite(n.disp) ? format(n.disp) : '';
|
||||
});
|
||||
// Perform sort by weight
|
||||
root.sort((a, b) => {
|
||||
const v = b.value - a.value;
|
||||
if (v === 0) {
|
||||
return b.name > a.name ? 1 : -1;
|
||||
}
|
||||
return v;
|
||||
});
|
||||
|
||||
// Prune data based on partition limit and threshold
|
||||
// both are applied at the same time
|
||||
if (fd.partition_threshold && fd.partition_threshold >= 0) {
|
||||
// Compute weight sums as we go
|
||||
root.each((n) => {
|
||||
n.sum = n.children ? n.children.reduce((a, v) => a + v.weight, 0) || 1 : 1;
|
||||
if (n.children) {
|
||||
// Dates are not ordered by weight
|
||||
if (hasDateNode(n)) {
|
||||
if (fd.equal_date_size) {
|
||||
return;
|
||||
}
|
||||
const removeIndices = [];
|
||||
// Keep at least one child
|
||||
for (let j = 1; j < n.children.length; j++) {
|
||||
if (n.children[j].weight / n.sum < fd.partition_threshold) {
|
||||
removeIndices.push(j);
|
||||
}
|
||||
}
|
||||
for (let j = removeIndices.length - 1; j >= 0; j--) {
|
||||
n.children.splice(removeIndices[j], 1);
|
||||
}
|
||||
} else {
|
||||
// Find first child that falls below the threshold
|
||||
let j;
|
||||
for (j = 1; j < n.children.length; j++) {
|
||||
if (n.children[j].weight / n.sum < fd.partition_threshold) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
n.children = n.children.slice(0, j);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
if (fd.partition_limit && fd.partition_limit >= 0) {
|
||||
root.each((n) => {
|
||||
if (n.children && n.children.length > fd.partition_limit) {
|
||||
if (!hasDateNode(n)) {
|
||||
n.children = n.children.slice(0, fd.partition_limit);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
// Compute final weight sums
|
||||
root.eachAfter((n) => {
|
||||
n.sum = n.children ? n.children.reduce((a, v) => a + v.weight, 0) || 1 : 1;
|
||||
});
|
||||
|
||||
const verboseMap = slice.datasource.verbose_map;
|
||||
function getCategory(depth) {
|
||||
if (!depth) {
|
||||
return 'Metric';
|
||||
}
|
||||
if (hasTime && depth === 1) {
|
||||
return 'Date';
|
||||
}
|
||||
const col = fd.groupby[depth - (hasTime ? 2 : 1)];
|
||||
return verboseMap[col] || col;
|
||||
}
|
||||
|
||||
function getAncestors(d) {
|
||||
const ancestors = [d];
|
||||
let node = d;
|
||||
while (node.parent) {
|
||||
ancestors.push(node.parent);
|
||||
node = node.parent;
|
||||
}
|
||||
return ancestors;
|
||||
}
|
||||
|
||||
function positionAndPopulate(tip, d) {
|
||||
let t = '<table>';
|
||||
if (!fd.rich_tooltip) {
|
||||
t += (
|
||||
'<thead><tr><td colspan="3">' +
|
||||
`<strong class='x-value'>${getCategory(d.depth)}</strong>` +
|
||||
'</td></tr></thead><tbody>'
|
||||
);
|
||||
t += (
|
||||
'<tr class="emph">' +
|
||||
'<td class="legend-color-guide" style="opacity: 0.75">' +
|
||||
`<div style='border: thin solid grey; background-color: ${d.color};'` +
|
||||
'></div>' +
|
||||
'</td>' +
|
||||
`<td>${d.name}</td>` +
|
||||
`<td>${d.disp}</td>` +
|
||||
'</tr>'
|
||||
);
|
||||
} else {
|
||||
const nodes = getAncestors(d);
|
||||
nodes.forEach((n) => {
|
||||
const atNode = n.depth === d.depth;
|
||||
t += '<tbody>';
|
||||
t += (
|
||||
`<tr class='${atNode ? 'emph' : ''}'>` +
|
||||
`<td class='legend-color-guide' style='opacity: ${atNode ? '1' : '0.75'}'>` +
|
||||
'<div ' +
|
||||
`style='border: 2px solid ${atNode ? 'black' : 'transparent'};` +
|
||||
`background-color: ${n.color};'` +
|
||||
'></div>' +
|
||||
'</td>' +
|
||||
`<td>${n.name}</td>` +
|
||||
`<td>${n.disp}</td>` +
|
||||
`<td>${getCategory(n.depth)}</td>` +
|
||||
'</tr>'
|
||||
);
|
||||
});
|
||||
}
|
||||
t += '</tbody></table>';
|
||||
tip.html(t)
|
||||
.style('left', (d3.event.pageX + 13) + 'px')
|
||||
.style('top', (d3.event.pageY - 10) + 'px');
|
||||
}
|
||||
|
||||
const g = viz
|
||||
.selectAll('g')
|
||||
.data(init(root))
|
||||
.enter()
|
||||
.append('svg:g')
|
||||
.attr('transform', d => `translate(${x(d.y)},${y(d.x)})`)
|
||||
.on('click', click)
|
||||
.on('mouseover', (d) => {
|
||||
tooltip
|
||||
.interrupt()
|
||||
.transition()
|
||||
.duration(100)
|
||||
.style('opacity', 0.9);
|
||||
positionAndPopulate(tooltip, d);
|
||||
})
|
||||
.on('mousemove', (d) => {
|
||||
positionAndPopulate(tooltip, d);
|
||||
})
|
||||
.on('mouseout', () => {
|
||||
tooltip
|
||||
.interrupt()
|
||||
.transition()
|
||||
.duration(250)
|
||||
.style('opacity', 0);
|
||||
});
|
||||
|
||||
let kx = w / root.dx;
|
||||
let ky = h / 1;
|
||||
|
||||
g.append('svg:rect')
|
||||
.attr('width', root.dy * kx)
|
||||
.attr('height', d => d.dx * ky);
|
||||
|
||||
g.append('svg:text')
|
||||
.attr('transform', transform)
|
||||
.attr('dy', '0.35em')
|
||||
.style('opacity', d => d.dx * ky > 12 ? 1 : 0)
|
||||
.text((d) => {
|
||||
if (!d.disp) {
|
||||
return d.name;
|
||||
}
|
||||
return `${d.name}: ${d.disp}`;
|
||||
});
|
||||
|
||||
// Apply color scheme
|
||||
g.selectAll('rect')
|
||||
.style('fill', (d) => {
|
||||
d.color = getColorFromScheme(d.name, fd.color_scheme);
|
||||
return d.color;
|
||||
});
|
||||
|
||||
// Zoom out when clicking outside vis
|
||||
// d3.select(window)
|
||||
// .on('click', () => click(root));
|
||||
|
||||
// Keep text centered in its division
|
||||
function transform(d) {
|
||||
return `translate(8,${d.dx * ky / 2})`;
|
||||
}
|
||||
|
||||
// When clicking a subdivision, the vis will zoom in to it
|
||||
function click(d) {
|
||||
if (!d.children) {
|
||||
if (d.parent) {
|
||||
// Clicking on the rightmost level should zoom in
|
||||
return click(d.parent);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
kx = (d.y ? w - 40 : w) / (1 - d.y);
|
||||
ky = h / d.dx;
|
||||
x.domain([d.y, 1]).range([d.y ? 40 : 0, w]);
|
||||
y.domain([d.x, d.x + d.dx]);
|
||||
|
||||
const t = g
|
||||
.transition()
|
||||
.duration(d3.event.altKey ? 7500 : 750)
|
||||
.attr('transform', nd => `translate(${x(nd.y)},${y(nd.x)})`);
|
||||
|
||||
t.select('rect')
|
||||
.attr('width', d.dy * kx)
|
||||
.attr('height', nd => nd.dx * ky);
|
||||
|
||||
t.select('text')
|
||||
.attr('transform', transform)
|
||||
.style('opacity', nd => nd.dx * ky > 12 ? 1 : 0);
|
||||
|
||||
d3.event.stopPropagation();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
for (let i = 0; i < data.length; i++) {
|
||||
drawVis(i, data);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = partitionVis;
|
||||
157
superset/viz.py
157
superset/viz.py
@@ -27,6 +27,7 @@ from flask_babel import lazy_gettext as _
|
||||
from markdown import markdown
|
||||
import simplejson as json
|
||||
from six import string_types, PY3
|
||||
from six.moves import reduce
|
||||
from dateutil import relativedelta as rdelta
|
||||
|
||||
from superset import app, utils, cache, get_manifest_file
|
||||
@@ -915,7 +916,7 @@ class NVD3TimeSeriesViz(NVD3Viz):
|
||||
if isinstance(series_title, string_types):
|
||||
series_title += title_suffix
|
||||
elif title_suffix and isinstance(series_title, (list, tuple)):
|
||||
series_title.append(title_suffix)
|
||||
series_title = series_title + (title_suffix,)
|
||||
|
||||
d = {
|
||||
"key": series_title,
|
||||
@@ -928,16 +929,24 @@ class NVD3TimeSeriesViz(NVD3Viz):
|
||||
chart_data.append(d)
|
||||
return chart_data
|
||||
|
||||
def process_data(self, df):
|
||||
def process_data(self, df, aggregate=False):
|
||||
fd = self.form_data
|
||||
df = df.fillna(0)
|
||||
if fd.get("granularity") == "all":
|
||||
raise Exception(_("Pick a time granularity for your time series"))
|
||||
|
||||
df = df.pivot_table(
|
||||
index=DTTM_ALIAS,
|
||||
columns=fd.get('groupby'),
|
||||
values=fd.get('metrics'))
|
||||
if not aggregate:
|
||||
df = df.pivot_table(
|
||||
index=DTTM_ALIAS,
|
||||
columns=fd.get('groupby'),
|
||||
values=fd.get('metrics'))
|
||||
else:
|
||||
df = df.pivot_table(
|
||||
index=DTTM_ALIAS,
|
||||
columns=fd.get('groupby'),
|
||||
values=fd.get('metrics'),
|
||||
fill_value=0,
|
||||
aggfunc=sum)
|
||||
|
||||
fm = fd.get("resample_fillmethod")
|
||||
if not fm:
|
||||
@@ -1782,6 +1791,142 @@ class PairedTTestViz(BaseViz):
|
||||
return data
|
||||
|
||||
|
||||
class PartitionViz(NVD3TimeSeriesViz):
|
||||
|
||||
"""
|
||||
A hierarchical data visualization with support for time series.
|
||||
"""
|
||||
|
||||
viz_type = 'partition'
|
||||
verbose_name = _("Partition Diagram")
|
||||
|
||||
def query_obj(self):
|
||||
query_obj = super(PartitionViz, self).query_obj()
|
||||
time_op = self.form_data.get('time_series_option', 'not_time')
|
||||
# Return time series data if the user specifies so
|
||||
query_obj['is_timeseries'] = time_op != 'not_time'
|
||||
return query_obj
|
||||
|
||||
def levels_for(self, time_op, groups, df):
|
||||
"""
|
||||
Compute the partition at each `level` from the dataframe.
|
||||
"""
|
||||
levels = {}
|
||||
for i in range(0, len(groups) + 1):
|
||||
agg_df = df.groupby(groups[:i]) if i else df
|
||||
levels[i] = (
|
||||
agg_df.mean() if time_op == 'agg_mean'
|
||||
else agg_df.sum(numeric_only=True))
|
||||
return levels
|
||||
|
||||
def levels_for_diff(self, time_op, groups, df):
|
||||
# Obtain a unique list of the time grains
|
||||
times = list(set(df[DTTM_ALIAS]))
|
||||
times.sort()
|
||||
until = times[len(times) - 1]
|
||||
since = times[0]
|
||||
# Function describing how to calculate the difference
|
||||
func = {
|
||||
'point_diff': [
|
||||
pd.Series.sub,
|
||||
lambda a, b, fill_value: a - b,
|
||||
],
|
||||
'point_factor': [
|
||||
pd.Series.div,
|
||||
lambda a, b, fill_value: a / float(b),
|
||||
],
|
||||
'point_percent': [
|
||||
lambda a, b, fill_value=0: a.div(b, fill_value=fill_value) - 1,
|
||||
lambda a, b, fill_value: a / float(b) - 1,
|
||||
],
|
||||
}[time_op]
|
||||
agg_df = df.groupby(DTTM_ALIAS).sum()
|
||||
levels = {0: pd.Series({
|
||||
m: func[1](agg_df[m][until], agg_df[m][since], 0)
|
||||
for m in agg_df.columns})}
|
||||
for i in range(1, len(groups) + 1):
|
||||
agg_df = df.groupby([DTTM_ALIAS] + groups[:i]).sum()
|
||||
levels[i] = pd.DataFrame({
|
||||
m: func[0](agg_df[m][until], agg_df[m][since], fill_value=0)
|
||||
for m in agg_df.columns})
|
||||
return levels
|
||||
|
||||
def levels_for_time(self, groups, df):
|
||||
procs = {}
|
||||
for i in range(0, len(groups) + 1):
|
||||
self.form_data['groupby'] = groups[:i]
|
||||
df_drop = df.drop(groups[i:], 1)
|
||||
procs[i] = self.process_data(df_drop, aggregate=True).fillna(0)
|
||||
self.form_data['groupby'] = groups
|
||||
return procs
|
||||
|
||||
def nest_values(self, levels, level=0, metric=None, dims=()):
|
||||
"""
|
||||
Nest values at each level on the back-end with
|
||||
access and setting, instead of summing from the bottom.
|
||||
"""
|
||||
if not level:
|
||||
return [{
|
||||
'name': m,
|
||||
'val': levels[0][m],
|
||||
'children': self.nest_values(levels, 1, m),
|
||||
} for m in levels[0].index]
|
||||
if level == 1:
|
||||
return [{
|
||||
'name': i,
|
||||
'val': levels[1][metric][i],
|
||||
'children': self.nest_values(levels, 2, metric, (i,)),
|
||||
} for i in levels[1][metric].index]
|
||||
if level >= len(levels):
|
||||
return []
|
||||
return [{
|
||||
'name': i,
|
||||
'val': levels[level][metric][dims][i],
|
||||
'children': self.nest_values(
|
||||
levels, level + 1, metric, dims + (i,)
|
||||
),
|
||||
} for i in levels[level][metric][dims].index]
|
||||
|
||||
def nest_procs(self, procs, level=-1, dims=(), time=None):
|
||||
if level == -1:
|
||||
return [{
|
||||
'name': m,
|
||||
'children': self.nest_procs(procs, 0, (m,)),
|
||||
} for m in procs[0].columns]
|
||||
if not level:
|
||||
return [{
|
||||
'name': t,
|
||||
'val': procs[0][dims[0]][t],
|
||||
'children': self.nest_procs(procs, 1, dims, t),
|
||||
} for t in procs[0].index]
|
||||
if level >= len(procs):
|
||||
return []
|
||||
return [{
|
||||
'name': i,
|
||||
'val': procs[level][dims][i][time],
|
||||
'children': self.nest_procs(procs, level + 1, dims + (i,), time)
|
||||
} for i in procs[level][dims].columns]
|
||||
|
||||
def get_data(self, df):
|
||||
fd = self.form_data
|
||||
groups = fd.get('groupby', [])
|
||||
time_op = fd.get('time_series_option', 'not_time')
|
||||
if not len(groups):
|
||||
raise ValueError('Please choose at least one groupby')
|
||||
if time_op == 'not_time':
|
||||
levels = self.levels_for('agg_sum', groups, df)
|
||||
elif time_op in ['agg_sum', 'agg_mean']:
|
||||
levels = self.levels_for(time_op, groups, df)
|
||||
elif time_op in ['point_diff', 'point_factor', 'point_percent']:
|
||||
levels = self.levels_for_diff(time_op, groups, df)
|
||||
elif time_op == 'adv_anal':
|
||||
procs = self.levels_for_time(groups, df)
|
||||
return self.nest_procs(procs)
|
||||
else:
|
||||
levels = self.levels_for('agg_sum', [DTTM_ALIAS] + groups, df)
|
||||
return self.nest_values(levels)
|
||||
|
||||
|
||||
viz_types = {
|
||||
o.viz_type: o for o in globals().values()
|
||||
if (
|
||||
|
||||
@@ -3,6 +3,7 @@ import pandas as pd
|
||||
import superset.viz as viz
|
||||
|
||||
from superset.utils import DTTM_ALIAS
|
||||
from mock import Mock, patch
|
||||
|
||||
|
||||
class PairedTTestTestCase(unittest.TestCase):
|
||||
@@ -135,3 +136,227 @@ class PairedTTestTestCase(unittest.TestCase):
|
||||
],
|
||||
}
|
||||
self.assertEquals(data, expected)
|
||||
|
||||
|
||||
class PartitionVizTestCase(unittest.TestCase):
|
||||
|
||||
@patch('superset.viz.BaseViz.query_obj')
|
||||
def test_query_obj_time_series_option(self, super_query_obj):
|
||||
datasource = Mock()
|
||||
form_data = {}
|
||||
test_viz = viz.PartitionViz(datasource, form_data)
|
||||
super_query_obj.return_value = {}
|
||||
query_obj = test_viz.query_obj()
|
||||
self.assertFalse(query_obj['is_timeseries'])
|
||||
test_viz.form_data['time_series_option'] = 'agg_sum'
|
||||
query_obj = test_viz.query_obj()
|
||||
self.assertTrue(query_obj['is_timeseries'])
|
||||
|
||||
def test_levels_for_computes_levels(self):
|
||||
raw = {}
|
||||
raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
|
||||
raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
|
||||
raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
|
||||
raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
|
||||
raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
|
||||
raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
|
||||
raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
|
||||
df = pd.DataFrame(raw)
|
||||
groups = ['groupA', 'groupB', 'groupC']
|
||||
time_op = 'agg_sum'
|
||||
test_viz = viz.PartitionViz(Mock(), {})
|
||||
levels = test_viz.levels_for(time_op, groups, df)
|
||||
self.assertEqual(4, len(levels))
|
||||
expected = {
|
||||
DTTM_ALIAS: 1800,
|
||||
'metric1': 45,
|
||||
'metric2': 450,
|
||||
'metric3': 4500,
|
||||
}
|
||||
self.assertEqual(expected, levels[0].to_dict())
|
||||
expected = {
|
||||
DTTM_ALIAS: {'a1': 600, 'b1': 600, 'c1': 600},
|
||||
'metric1': {'a1': 6, 'b1': 15, 'c1': 24},
|
||||
'metric2': {'a1': 60, 'b1': 150, 'c1': 240},
|
||||
'metric3': {'a1': 600, 'b1': 1500, 'c1': 2400},
|
||||
}
|
||||
self.assertEqual(expected, levels[1].to_dict())
|
||||
self.assertEqual(['groupA', 'groupB'], levels[2].index.names)
|
||||
self.assertEqual(
|
||||
['groupA', 'groupB', 'groupC'],
|
||||
levels[3].index.names,
|
||||
)
|
||||
time_op = 'agg_mean'
|
||||
levels = test_viz.levels_for(time_op, groups, df)
|
||||
self.assertEqual(4, len(levels))
|
||||
expected = {
|
||||
DTTM_ALIAS: 200.0,
|
||||
'metric1': 5.0,
|
||||
'metric2': 50.0,
|
||||
'metric3': 500.0,
|
||||
}
|
||||
self.assertEqual(expected, levels[0].to_dict())
|
||||
expected = {
|
||||
DTTM_ALIAS: {'a1': 200, 'c1': 200, 'b1': 200},
|
||||
'metric1': {'a1': 2, 'b1': 5, 'c1': 8},
|
||||
'metric2': {'a1': 20, 'b1': 50, 'c1': 80},
|
||||
'metric3': {'a1': 200, 'b1': 500, 'c1': 800},
|
||||
}
|
||||
self.assertEqual(expected, levels[1].to_dict())
|
||||
self.assertEqual(['groupA', 'groupB'], levels[2].index.names)
|
||||
self.assertEqual(
|
||||
['groupA', 'groupB', 'groupC'],
|
||||
levels[3].index.names,
|
||||
)
|
||||
|
||||
def test_levels_for_diff_computes_difference(self):
|
||||
raw = {}
|
||||
raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
|
||||
raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
|
||||
raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
|
||||
raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
|
||||
raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
|
||||
raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
|
||||
raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
|
||||
df = pd.DataFrame(raw)
|
||||
groups = ['groupA', 'groupB', 'groupC']
|
||||
test_viz = viz.PartitionViz(Mock(), {})
|
||||
time_op = 'point_diff'
|
||||
levels = test_viz.levels_for_diff(time_op, groups, df)
|
||||
expected = {
|
||||
'metric1': 6,
|
||||
'metric2': 60,
|
||||
'metric3': 600,
|
||||
}
|
||||
self.assertEqual(expected, levels[0].to_dict())
|
||||
expected = {
|
||||
'metric1': {'a1': 2, 'b1': 2, 'c1': 2},
|
||||
'metric2': {'a1': 20, 'b1': 20, 'c1': 20},
|
||||
'metric3': {'a1': 200, 'b1': 200, 'c1': 200},
|
||||
}
|
||||
self.assertEqual(expected, levels[1].to_dict())
|
||||
self.assertEqual(4, len(levels))
|
||||
self.assertEqual(['groupA', 'groupB', 'groupC'], levels[3].index.names)
|
||||
|
||||
def test_levels_for_time_calls_process_data_and_drops_cols(self):
|
||||
raw = {}
|
||||
raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
|
||||
raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
|
||||
raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
|
||||
raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
|
||||
raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
|
||||
raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
|
||||
raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
|
||||
df = pd.DataFrame(raw)
|
||||
groups = ['groupA', 'groupB', 'groupC']
|
||||
test_viz = viz.PartitionViz(Mock(), {'groupby': groups})
|
||||
|
||||
def return_args(df_drop, aggregate):
|
||||
return df_drop
|
||||
test_viz.process_data = Mock(side_effect=return_args)
|
||||
levels = test_viz.levels_for_time(groups, df)
|
||||
self.assertEqual(4, len(levels))
|
||||
cols = [DTTM_ALIAS, 'metric1', 'metric2', 'metric3']
|
||||
self.assertEqual(sorted(cols), sorted(levels[0].columns.tolist()))
|
||||
cols += ['groupA']
|
||||
self.assertEqual(sorted(cols), sorted(levels[1].columns.tolist()))
|
||||
cols += ['groupB']
|
||||
self.assertEqual(sorted(cols), sorted(levels[2].columns.tolist()))
|
||||
cols += ['groupC']
|
||||
self.assertEqual(sorted(cols), sorted(levels[3].columns.tolist()))
|
||||
self.assertEqual(4, len(test_viz.process_data.mock_calls))
|
||||
|
||||
def test_nest_values_returns_hierarchy(self):
|
||||
raw = {}
|
||||
raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
|
||||
raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
|
||||
raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
|
||||
raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
|
||||
raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
|
||||
raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
|
||||
df = pd.DataFrame(raw)
|
||||
test_viz = viz.PartitionViz(Mock(), {})
|
||||
groups = ['groupA', 'groupB', 'groupC']
|
||||
levels = test_viz.levels_for('agg_sum', groups, df)
|
||||
nest = test_viz.nest_values(levels)
|
||||
self.assertEqual(3, len(nest))
|
||||
for i in range(0, 3):
|
||||
self.assertEqual('metric' + str(i + 1), nest[i]['name'])
|
||||
self.assertEqual(3, len(nest[0]['children']))
|
||||
self.assertEqual(1, len(nest[0]['children'][0]['children']))
|
||||
self.assertEqual(1, len(nest[0]['children'][0]['children'][0]['children']))
|
||||
|
||||
def test_nest_procs_returns_hierarchy(self):
|
||||
raw = {}
|
||||
raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
|
||||
raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
|
||||
raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
|
||||
raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
|
||||
raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
|
||||
raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
|
||||
raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
|
||||
df = pd.DataFrame(raw)
|
||||
test_viz = viz.PartitionViz(Mock(), {})
|
||||
groups = ['groupA', 'groupB', 'groupC']
|
||||
metrics = ['metric1', 'metric2', 'metric3']
|
||||
procs = {}
|
||||
for i in range(0, 4):
|
||||
df_drop = df.drop(groups[i:], 1)
|
||||
pivot = df_drop.pivot_table(
|
||||
index=DTTM_ALIAS,
|
||||
columns=groups[:i],
|
||||
values=metrics,
|
||||
)
|
||||
procs[i] = pivot
|
||||
nest = test_viz.nest_procs(procs)
|
||||
self.assertEqual(3, len(nest))
|
||||
for i in range(0, 3):
|
||||
self.assertEqual('metric' + str(i + 1), nest[i]['name'])
|
||||
self.assertEqual(None, nest[i].get('val'))
|
||||
self.assertEqual(3, len(nest[0]['children']))
|
||||
self.assertEqual(3, len(nest[0]['children'][0]['children']))
|
||||
self.assertEqual(1, len(nest[0]['children'][0]['children'][0]['children']))
|
||||
self.assertEqual(1,
|
||||
len(nest[0]['children']
|
||||
[0]['children']
|
||||
[0]['children']
|
||||
[0]['children'])
|
||||
)
|
||||
|
||||
def test_get_data_calls_correct_method(self):
|
||||
test_viz = viz.PartitionViz(Mock(), {})
|
||||
df = Mock()
|
||||
with self.assertRaises(ValueError):
|
||||
test_viz.get_data(df)
|
||||
test_viz.levels_for = Mock(return_value=1)
|
||||
test_viz.nest_values = Mock(return_value=1)
|
||||
test_viz.form_data['groupby'] = ['groups']
|
||||
test_viz.form_data['time_series_option'] = 'not_time'
|
||||
test_viz.get_data(df)
|
||||
self.assertEqual('agg_sum', test_viz.levels_for.mock_calls[0][1][0])
|
||||
test_viz.form_data['time_series_option'] = 'agg_sum'
|
||||
test_viz.get_data(df)
|
||||
self.assertEqual('agg_sum', test_viz.levels_for.mock_calls[1][1][0])
|
||||
test_viz.form_data['time_series_option'] = 'agg_mean'
|
||||
test_viz.get_data(df)
|
||||
self.assertEqual('agg_mean', test_viz.levels_for.mock_calls[2][1][0])
|
||||
test_viz.form_data['time_series_option'] = 'point_diff'
|
||||
test_viz.levels_for_diff = Mock(return_value=1)
|
||||
test_viz.get_data(df)
|
||||
self.assertEqual('point_diff', test_viz.levels_for_diff.mock_calls[0][1][0])
|
||||
test_viz.form_data['time_series_option'] = 'point_percent'
|
||||
test_viz.get_data(df)
|
||||
self.assertEqual('point_percent', test_viz.levels_for_diff.mock_calls[1][1][0])
|
||||
test_viz.form_data['time_series_option'] = 'point_factor'
|
||||
test_viz.get_data(df)
|
||||
self.assertEqual('point_factor', test_viz.levels_for_diff.mock_calls[2][1][0])
|
||||
test_viz.levels_for_time = Mock(return_value=1)
|
||||
test_viz.nest_procs = Mock(return_value=1)
|
||||
test_viz.form_data['time_series_option'] = 'adv_anal'
|
||||
test_viz.get_data(df)
|
||||
self.assertEqual(1, len(test_viz.levels_for_time.mock_calls))
|
||||
self.assertEqual(1, len(test_viz.nest_procs.mock_calls))
|
||||
test_viz.form_data['time_series_option'] = 'time_series'
|
||||
test_viz.get_data(df)
|
||||
self.assertEqual('agg_sum', test_viz.levels_for.mock_calls[3][1][0])
|
||||
self.assertEqual(7, len(test_viz.nest_values.mock_calls))
|
||||
|
||||
Reference in New Issue
Block a user