[New Viz] Partition Diagram (#3642)

* Added Partition Visualization

* added unit tests
This commit is contained in:
Jeff Niu
2017-10-12 21:54:59 -07:00
committed by Maxime Beauchemin
parent 48e28eff9b
commit bad6938d1a
10 changed files with 894 additions and 6 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 194 KiB

View File

@@ -0,0 +1,28 @@
import React from 'react';
import PropTypes from 'prop-types';
import InfoTooltipWithTrigger from './InfoTooltipWithTrigger';
const propTypes = {
option: PropTypes.object.isRequired,
};
// This component provides a general tooltip for options
// in a SelectControl
export default function OptionDescription({ option }) {
return (
<span>
<span className="m-r-5 option-label">
{option.label}
</span>
{option.description &&
<InfoTooltipWithTrigger
className="m-r-5 text-muted"
icon="question-circle-o"
tooltip={option.description}
label={`descr-${option.label}`}
/>
}
</span>);
}
OptionDescription.propTypes = propTypes;

View File

@@ -4,6 +4,7 @@ import * as v from '../validators';
import { ALL_COLOR_SCHEMES, spectrums } from '../../modules/colors';
import MetricOption from '../../components/MetricOption';
import ColumnOption from '../../components/ColumnOption';
import OptionDescription from '../../components/OptionDescription';
import { t } from '../../locales';
const D3_FORMAT_DOCS = 'D3 format syntax: https://github.com/d3/d3-format';
@@ -98,6 +99,7 @@ export const controls = {
}),
description: t('One or many metrics to display'),
},
y_axis_bounds: {
type: 'BoundsControl',
label: t('Y Axis Bounds'),
@@ -108,6 +110,7 @@ export const controls = {
"this feature will only expand the axis range. It won't " +
"narrow the data's extent."),
},
order_by_cols: {
type: 'SelectControl',
multi: true,
@@ -909,6 +912,16 @@ export const controls = {
description: D3_FORMAT_DOCS,
},
date_time_format: {
type: 'SelectControl',
freeForm: true,
label: t('Date Time Format'),
renderTrigger: true,
default: 'smart_date',
choices: D3_TIME_FORMAT_OPTIONS,
description: D3_FORMAT_DOCS,
},
markup_type: {
type: 'SelectControl',
label: t('Markup Type'),
@@ -1136,6 +1149,14 @@ export const controls = {
description: t('Use a log scale for the X axis'),
},
log_scale: {
type: 'CheckboxControl',
label: t('Log Scale'),
default: false,
renderTrigger: true,
description: t('Use a log scale'),
},
donut: {
type: 'CheckboxControl',
label: t('Donut'),
@@ -1456,5 +1477,85 @@ export const controls = {
controlName: 'TimeSeriesColumnControl',
},
time_series_option: {
type: 'SelectControl',
label: t('Options'),
validators: [v.nonEmpty],
default: 'not_time',
valueKey: 'value',
options: [
{
label: t('Not Time Series'),
value: 'not_time',
description: t('Ignore time'),
},
{
label: t('Time Series'),
value: 'time_series',
description: t('Standard time series'),
},
{
label: t('Aggregate Mean'),
value: 'agg_mean',
description: t('Mean of values over specified period'),
},
{
label: t('Aggregate Sum'),
value: 'agg_sum',
description: t('Sum of values over specified period'),
},
{
label: t('Difference'),
value: 'point_diff',
description: t('Metric change in value from `since` to `until`'),
},
{
label: t('Percent Change'),
value: 'point_percent',
description: t('Metric percent change in value from `since` to `until`'),
},
{
label: t('Factor'),
value: 'point_factor',
description: t('Metric factor change from `since` to `until`'),
},
{
label: t('Advanced Analytics'),
value: 'adv_anal',
description: t('Use the Advanced Analytics options below'),
},
],
optionRenderer: op => <OptionDescription option={op} />,
valueRenderer: op => <OptionDescription option={op} />,
description: t('Settings for time series'),
},
equal_date_size: {
type: 'CheckboxControl',
label: t('Equal Date Sizes'),
default: true,
renderTrigger: true,
description: t('Check to force date partitions to have the same height'),
},
partition_limit: {
type: 'TextControl',
label: t('Partition Limit'),
isInt: true,
default: '5',
description:
t('The maximum number of subdivisions of each group; ' +
'lower values are pruned first'),
},
partition_threshold: {
type: 'TextControl',
label: t('Partition Threshold'),
isFloat: true,
default: '0.05',
description:
t('Partitions whose height to parent height proportions are ' +
'below this value are pruned'),
},
};
export default controls;

View File

@@ -1155,6 +1155,33 @@ export const visTypes = {
},
],
},
partition: {
label: 'Partition Diagram',
showOnExplore: true,
controlPanelSections: [
sections.NVD3TimeSeries[0],
{
label: t('Time Series Options'),
expanded: true,
controlSetRows: [
['time_series_option'],
],
},
{
label: t('Chart Options'),
expanded: true,
controlSetRows: [
['color_scheme'],
['number_format', 'date_time_format'],
['partition_limit', 'partition_threshold'],
['log_scale', 'equal_date_size'],
['rich_tooltip'],
],
},
sections.NVD3TimeSeries[1],
],
},
};
export default visTypes;

View File

@@ -52,6 +52,7 @@
"d3-sankey": "^0.4.2",
"d3-svg-legend": "^1.x",
"d3-tip": "^0.6.7",
"d3-hierarchy": "^1.1.5",
"datamaps": "^0.5.8",
"datatables.net-bs": "^1.10.15",
"distributions": "^1.0.0",

View File

@@ -35,5 +35,6 @@ const vizMap = {
dual_line: require('./nvd3_vis.js'),
event_flow: require('./EventFlow.jsx'),
paired_ttest: require('./paired_ttest.jsx'),
partition: require('./partition.js'),
};
export default vizMap;

View File

@@ -0,0 +1,27 @@
.partition .chart {
display: block;
margin: auto;
font-size: 11px;
}
.partition rect {
stroke: #eee;
fill: #aaa;
fill-opacity: .8;
transition: fill-opacity 180ms linear;
cursor: pointer;
}
.partition rect:hover {
fill-opacity: 1;
}
.partition g text {
font-weight: bold;
pointer-events: none;
fill: rgba(0, 0, 0, 0.8);
}
.partition g:hover text {
fill: rgba(0, 0, 0, 1);
}

View File

@@ -0,0 +1,333 @@
/* eslint no-param-reassign: [2, {"props": false}] */
/* eslint no-use-before-define: ["error", { "functions": false }] */
import d3 from 'd3';
import {
d3TimeFormatPreset,
} from '../javascripts/modules/utils';
import { getColorFromScheme } from '../javascripts/modules/colors';
import './partition.css';
d3.hierarchy = require('d3-hierarchy').hierarchy;
d3.partition = require('d3-hierarchy').partition;
function init(root) {
// Compute dx, dy, x, y for each node and
// return an array of nodes in breadth-first order
const flat = [];
const dy = 1.0 / (root.height + 1);
let prev = null;
root.each((n) => {
n.y = dy * n.depth;
n.dy = dy;
if (!n.parent) {
n.x = 0;
n.dx = 1;
} else {
n.x = prev.depth === n.parent.depth ? 0 : prev.x + prev.dx;
n.dx = n.weight / n.parent.sum * n.parent.dx;
}
prev = n;
flat.push(n);
});
return flat;
}
// This vis is based on
// http://mbostock.github.io/d3/talk/20111018/partition.html
function partitionVis(slice, payload) {
const data = payload.data;
const fd = slice.formData;
const div = d3.select(slice.selector);
const metrics = fd.metrics || [];
// Chart options
const logScale = fd.log_scale || false;
const chartType = fd.time_series_option || 'not_time';
const hasTime = ['adv_anal', 'time_series'].indexOf(chartType) >= 0;
const format = d3.format(fd.number_format);
const timeFormat = d3TimeFormatPreset(fd.date_time_format);
div.selectAll('*').remove();
d3.selectAll('.nvtooltip').remove();
const tooltip = d3
.select('body')
.append('div')
.attr('class', 'nvtooltip')
.style('opacity', 0)
.style('top', 0)
.style('left', 0)
.style('position', 'fixed');
function drawVis(i, dat) {
const datum = dat[i];
const w = slice.width();
const h = slice.height() / data.length;
const x = d3.scale.linear().range([0, w]);
const y = d3.scale.linear().range([0, h]);
const viz = div
.append('div')
.attr('class', 'chart')
.style('width', w + 'px')
.style('height', h + 'px')
.append('svg:svg')
.attr('width', w)
.attr('height', h);
// Add padding between multiple visualizations
if (i !== data.length - 1 && data.length > 1) {
viz.style('padding-bottom', '3px');
}
if (i !== 0 && data.length > 1) {
viz.style('padding-top', '3px');
}
const root = d3.hierarchy(datum);
function hasDateNode(n) {
return metrics.indexOf(n.data.name) >= 0 && hasTime;
}
// node.name is the metric/group name
// node.disp is the display value
// node.value determines sorting order
// node.weight determines partition height
// node.sum is the sum of children weights
root.eachAfter((n) => {
n.disp = n.data.val;
n.value = n.disp < 0 ? -n.disp : n.disp;
n.weight = n.value;
n.name = n.data.name;
// If the parent is a metric and we still have
// the time column, perform a date-time format
if (n.parent && hasDateNode(n.parent)) {
// Format timestamp values
n.weight = fd.equal_date_size ? 1 : n.value;
n.value = n.name;
n.name = timeFormat(n.name);
}
if (logScale) n.weight = Math.log(n.weight + 1);
n.disp = n.disp && !isNaN(n.disp) && isFinite(n.disp) ? format(n.disp) : '';
});
// Perform sort by weight
root.sort((a, b) => {
const v = b.value - a.value;
if (v === 0) {
return b.name > a.name ? 1 : -1;
}
return v;
});
// Prune data based on partition limit and threshold
// both are applied at the same time
if (fd.partition_threshold && fd.partition_threshold >= 0) {
// Compute weight sums as we go
root.each((n) => {
n.sum = n.children ? n.children.reduce((a, v) => a + v.weight, 0) || 1 : 1;
if (n.children) {
// Dates are not ordered by weight
if (hasDateNode(n)) {
if (fd.equal_date_size) {
return;
}
const removeIndices = [];
// Keep at least one child
for (let j = 1; j < n.children.length; j++) {
if (n.children[j].weight / n.sum < fd.partition_threshold) {
removeIndices.push(j);
}
}
for (let j = removeIndices.length - 1; j >= 0; j--) {
n.children.splice(removeIndices[j], 1);
}
} else {
// Find first child that falls below the threshold
let j;
for (j = 1; j < n.children.length; j++) {
if (n.children[j].weight / n.sum < fd.partition_threshold) {
break;
}
}
n.children = n.children.slice(0, j);
}
}
});
}
if (fd.partition_limit && fd.partition_limit >= 0) {
root.each((n) => {
if (n.children && n.children.length > fd.partition_limit) {
if (!hasDateNode(n)) {
n.children = n.children.slice(0, fd.partition_limit);
}
}
});
}
// Compute final weight sums
root.eachAfter((n) => {
n.sum = n.children ? n.children.reduce((a, v) => a + v.weight, 0) || 1 : 1;
});
const verboseMap = slice.datasource.verbose_map;
function getCategory(depth) {
if (!depth) {
return 'Metric';
}
if (hasTime && depth === 1) {
return 'Date';
}
const col = fd.groupby[depth - (hasTime ? 2 : 1)];
return verboseMap[col] || col;
}
function getAncestors(d) {
const ancestors = [d];
let node = d;
while (node.parent) {
ancestors.push(node.parent);
node = node.parent;
}
return ancestors;
}
function positionAndPopulate(tip, d) {
let t = '<table>';
if (!fd.rich_tooltip) {
t += (
'<thead><tr><td colspan="3">' +
`<strong class='x-value'>${getCategory(d.depth)}</strong>` +
'</td></tr></thead><tbody>'
);
t += (
'<tr class="emph">' +
'<td class="legend-color-guide" style="opacity: 0.75">' +
`<div style='border: thin solid grey; background-color: ${d.color};'` +
'></div>' +
'</td>' +
`<td>${d.name}</td>` +
`<td>${d.disp}</td>` +
'</tr>'
);
} else {
const nodes = getAncestors(d);
nodes.forEach((n) => {
const atNode = n.depth === d.depth;
t += '<tbody>';
t += (
`<tr class='${atNode ? 'emph' : ''}'>` +
`<td class='legend-color-guide' style='opacity: ${atNode ? '1' : '0.75'}'>` +
'<div ' +
`style='border: 2px solid ${atNode ? 'black' : 'transparent'};` +
`background-color: ${n.color};'` +
'></div>' +
'</td>' +
`<td>${n.name}</td>` +
`<td>${n.disp}</td>` +
`<td>${getCategory(n.depth)}</td>` +
'</tr>'
);
});
}
t += '</tbody></table>';
tip.html(t)
.style('left', (d3.event.pageX + 13) + 'px')
.style('top', (d3.event.pageY - 10) + 'px');
}
const g = viz
.selectAll('g')
.data(init(root))
.enter()
.append('svg:g')
.attr('transform', d => `translate(${x(d.y)},${y(d.x)})`)
.on('click', click)
.on('mouseover', (d) => {
tooltip
.interrupt()
.transition()
.duration(100)
.style('opacity', 0.9);
positionAndPopulate(tooltip, d);
})
.on('mousemove', (d) => {
positionAndPopulate(tooltip, d);
})
.on('mouseout', () => {
tooltip
.interrupt()
.transition()
.duration(250)
.style('opacity', 0);
});
let kx = w / root.dx;
let ky = h / 1;
g.append('svg:rect')
.attr('width', root.dy * kx)
.attr('height', d => d.dx * ky);
g.append('svg:text')
.attr('transform', transform)
.attr('dy', '0.35em')
.style('opacity', d => d.dx * ky > 12 ? 1 : 0)
.text((d) => {
if (!d.disp) {
return d.name;
}
return `${d.name}: ${d.disp}`;
});
// Apply color scheme
g.selectAll('rect')
.style('fill', (d) => {
d.color = getColorFromScheme(d.name, fd.color_scheme);
return d.color;
});
// Zoom out when clicking outside vis
// d3.select(window)
// .on('click', () => click(root));
// Keep text centered in its division
function transform(d) {
return `translate(8,${d.dx * ky / 2})`;
}
// When clicking a subdivision, the vis will zoom in to it
function click(d) {
if (!d.children) {
if (d.parent) {
// Clicking on the rightmost level should zoom in
return click(d.parent);
}
return false;
}
kx = (d.y ? w - 40 : w) / (1 - d.y);
ky = h / d.dx;
x.domain([d.y, 1]).range([d.y ? 40 : 0, w]);
y.domain([d.x, d.x + d.dx]);
const t = g
.transition()
.duration(d3.event.altKey ? 7500 : 750)
.attr('transform', nd => `translate(${x(nd.y)},${y(nd.x)})`);
t.select('rect')
.attr('width', d.dy * kx)
.attr('height', nd => nd.dx * ky);
t.select('text')
.attr('transform', transform)
.style('opacity', nd => nd.dx * ky > 12 ? 1 : 0);
d3.event.stopPropagation();
return true;
}
}
for (let i = 0; i < data.length; i++) {
drawVis(i, data);
}
}
module.exports = partitionVis;

View File

@@ -27,6 +27,7 @@ from flask_babel import lazy_gettext as _
from markdown import markdown
import simplejson as json
from six import string_types, PY3
from six.moves import reduce
from dateutil import relativedelta as rdelta
from superset import app, utils, cache, get_manifest_file
@@ -915,7 +916,7 @@ class NVD3TimeSeriesViz(NVD3Viz):
if isinstance(series_title, string_types):
series_title += title_suffix
elif title_suffix and isinstance(series_title, (list, tuple)):
series_title.append(title_suffix)
series_title = series_title + (title_suffix,)
d = {
"key": series_title,
@@ -928,16 +929,24 @@ class NVD3TimeSeriesViz(NVD3Viz):
chart_data.append(d)
return chart_data
def process_data(self, df):
def process_data(self, df, aggregate=False):
fd = self.form_data
df = df.fillna(0)
if fd.get("granularity") == "all":
raise Exception(_("Pick a time granularity for your time series"))
df = df.pivot_table(
index=DTTM_ALIAS,
columns=fd.get('groupby'),
values=fd.get('metrics'))
if not aggregate:
df = df.pivot_table(
index=DTTM_ALIAS,
columns=fd.get('groupby'),
values=fd.get('metrics'))
else:
df = df.pivot_table(
index=DTTM_ALIAS,
columns=fd.get('groupby'),
values=fd.get('metrics'),
fill_value=0,
aggfunc=sum)
fm = fd.get("resample_fillmethod")
if not fm:
@@ -1782,6 +1791,142 @@ class PairedTTestViz(BaseViz):
return data
class PartitionViz(NVD3TimeSeriesViz):
"""
A hierarchical data visualization with support for time series.
"""
viz_type = 'partition'
verbose_name = _("Partition Diagram")
def query_obj(self):
query_obj = super(PartitionViz, self).query_obj()
time_op = self.form_data.get('time_series_option', 'not_time')
# Return time series data if the user specifies so
query_obj['is_timeseries'] = time_op != 'not_time'
return query_obj
def levels_for(self, time_op, groups, df):
"""
Compute the partition at each `level` from the dataframe.
"""
levels = {}
for i in range(0, len(groups) + 1):
agg_df = df.groupby(groups[:i]) if i else df
levels[i] = (
agg_df.mean() if time_op == 'agg_mean'
else agg_df.sum(numeric_only=True))
return levels
def levels_for_diff(self, time_op, groups, df):
# Obtain a unique list of the time grains
times = list(set(df[DTTM_ALIAS]))
times.sort()
until = times[len(times) - 1]
since = times[0]
# Function describing how to calculate the difference
func = {
'point_diff': [
pd.Series.sub,
lambda a, b, fill_value: a - b,
],
'point_factor': [
pd.Series.div,
lambda a, b, fill_value: a / float(b),
],
'point_percent': [
lambda a, b, fill_value=0: a.div(b, fill_value=fill_value) - 1,
lambda a, b, fill_value: a / float(b) - 1,
],
}[time_op]
agg_df = df.groupby(DTTM_ALIAS).sum()
levels = {0: pd.Series({
m: func[1](agg_df[m][until], agg_df[m][since], 0)
for m in agg_df.columns})}
for i in range(1, len(groups) + 1):
agg_df = df.groupby([DTTM_ALIAS] + groups[:i]).sum()
levels[i] = pd.DataFrame({
m: func[0](agg_df[m][until], agg_df[m][since], fill_value=0)
for m in agg_df.columns})
return levels
def levels_for_time(self, groups, df):
procs = {}
for i in range(0, len(groups) + 1):
self.form_data['groupby'] = groups[:i]
df_drop = df.drop(groups[i:], 1)
procs[i] = self.process_data(df_drop, aggregate=True).fillna(0)
self.form_data['groupby'] = groups
return procs
def nest_values(self, levels, level=0, metric=None, dims=()):
"""
Nest values at each level on the back-end with
access and setting, instead of summing from the bottom.
"""
if not level:
return [{
'name': m,
'val': levels[0][m],
'children': self.nest_values(levels, 1, m),
} for m in levels[0].index]
if level == 1:
return [{
'name': i,
'val': levels[1][metric][i],
'children': self.nest_values(levels, 2, metric, (i,)),
} for i in levels[1][metric].index]
if level >= len(levels):
return []
return [{
'name': i,
'val': levels[level][metric][dims][i],
'children': self.nest_values(
levels, level + 1, metric, dims + (i,)
),
} for i in levels[level][metric][dims].index]
def nest_procs(self, procs, level=-1, dims=(), time=None):
if level == -1:
return [{
'name': m,
'children': self.nest_procs(procs, 0, (m,)),
} for m in procs[0].columns]
if not level:
return [{
'name': t,
'val': procs[0][dims[0]][t],
'children': self.nest_procs(procs, 1, dims, t),
} for t in procs[0].index]
if level >= len(procs):
return []
return [{
'name': i,
'val': procs[level][dims][i][time],
'children': self.nest_procs(procs, level + 1, dims + (i,), time)
} for i in procs[level][dims].columns]
def get_data(self, df):
fd = self.form_data
groups = fd.get('groupby', [])
time_op = fd.get('time_series_option', 'not_time')
if not len(groups):
raise ValueError('Please choose at least one groupby')
if time_op == 'not_time':
levels = self.levels_for('agg_sum', groups, df)
elif time_op in ['agg_sum', 'agg_mean']:
levels = self.levels_for(time_op, groups, df)
elif time_op in ['point_diff', 'point_factor', 'point_percent']:
levels = self.levels_for_diff(time_op, groups, df)
elif time_op == 'adv_anal':
procs = self.levels_for_time(groups, df)
return self.nest_procs(procs)
else:
levels = self.levels_for('agg_sum', [DTTM_ALIAS] + groups, df)
return self.nest_values(levels)
viz_types = {
o.viz_type: o for o in globals().values()
if (

View File

@@ -3,6 +3,7 @@ import pandas as pd
import superset.viz as viz
from superset.utils import DTTM_ALIAS
from mock import Mock, patch
class PairedTTestTestCase(unittest.TestCase):
@@ -135,3 +136,227 @@ class PairedTTestTestCase(unittest.TestCase):
],
}
self.assertEquals(data, expected)
class PartitionVizTestCase(unittest.TestCase):
@patch('superset.viz.BaseViz.query_obj')
def test_query_obj_time_series_option(self, super_query_obj):
datasource = Mock()
form_data = {}
test_viz = viz.PartitionViz(datasource, form_data)
super_query_obj.return_value = {}
query_obj = test_viz.query_obj()
self.assertFalse(query_obj['is_timeseries'])
test_viz.form_data['time_series_option'] = 'agg_sum'
query_obj = test_viz.query_obj()
self.assertTrue(query_obj['is_timeseries'])
def test_levels_for_computes_levels(self):
raw = {}
raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
df = pd.DataFrame(raw)
groups = ['groupA', 'groupB', 'groupC']
time_op = 'agg_sum'
test_viz = viz.PartitionViz(Mock(), {})
levels = test_viz.levels_for(time_op, groups, df)
self.assertEqual(4, len(levels))
expected = {
DTTM_ALIAS: 1800,
'metric1': 45,
'metric2': 450,
'metric3': 4500,
}
self.assertEqual(expected, levels[0].to_dict())
expected = {
DTTM_ALIAS: {'a1': 600, 'b1': 600, 'c1': 600},
'metric1': {'a1': 6, 'b1': 15, 'c1': 24},
'metric2': {'a1': 60, 'b1': 150, 'c1': 240},
'metric3': {'a1': 600, 'b1': 1500, 'c1': 2400},
}
self.assertEqual(expected, levels[1].to_dict())
self.assertEqual(['groupA', 'groupB'], levels[2].index.names)
self.assertEqual(
['groupA', 'groupB', 'groupC'],
levels[3].index.names,
)
time_op = 'agg_mean'
levels = test_viz.levels_for(time_op, groups, df)
self.assertEqual(4, len(levels))
expected = {
DTTM_ALIAS: 200.0,
'metric1': 5.0,
'metric2': 50.0,
'metric3': 500.0,
}
self.assertEqual(expected, levels[0].to_dict())
expected = {
DTTM_ALIAS: {'a1': 200, 'c1': 200, 'b1': 200},
'metric1': {'a1': 2, 'b1': 5, 'c1': 8},
'metric2': {'a1': 20, 'b1': 50, 'c1': 80},
'metric3': {'a1': 200, 'b1': 500, 'c1': 800},
}
self.assertEqual(expected, levels[1].to_dict())
self.assertEqual(['groupA', 'groupB'], levels[2].index.names)
self.assertEqual(
['groupA', 'groupB', 'groupC'],
levels[3].index.names,
)
def test_levels_for_diff_computes_difference(self):
raw = {}
raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
df = pd.DataFrame(raw)
groups = ['groupA', 'groupB', 'groupC']
test_viz = viz.PartitionViz(Mock(), {})
time_op = 'point_diff'
levels = test_viz.levels_for_diff(time_op, groups, df)
expected = {
'metric1': 6,
'metric2': 60,
'metric3': 600,
}
self.assertEqual(expected, levels[0].to_dict())
expected = {
'metric1': {'a1': 2, 'b1': 2, 'c1': 2},
'metric2': {'a1': 20, 'b1': 20, 'c1': 20},
'metric3': {'a1': 200, 'b1': 200, 'c1': 200},
}
self.assertEqual(expected, levels[1].to_dict())
self.assertEqual(4, len(levels))
self.assertEqual(['groupA', 'groupB', 'groupC'], levels[3].index.names)
def test_levels_for_time_calls_process_data_and_drops_cols(self):
raw = {}
raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
df = pd.DataFrame(raw)
groups = ['groupA', 'groupB', 'groupC']
test_viz = viz.PartitionViz(Mock(), {'groupby': groups})
def return_args(df_drop, aggregate):
return df_drop
test_viz.process_data = Mock(side_effect=return_args)
levels = test_viz.levels_for_time(groups, df)
self.assertEqual(4, len(levels))
cols = [DTTM_ALIAS, 'metric1', 'metric2', 'metric3']
self.assertEqual(sorted(cols), sorted(levels[0].columns.tolist()))
cols += ['groupA']
self.assertEqual(sorted(cols), sorted(levels[1].columns.tolist()))
cols += ['groupB']
self.assertEqual(sorted(cols), sorted(levels[2].columns.tolist()))
cols += ['groupC']
self.assertEqual(sorted(cols), sorted(levels[3].columns.tolist()))
self.assertEqual(4, len(test_viz.process_data.mock_calls))
def test_nest_values_returns_hierarchy(self):
raw = {}
raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
df = pd.DataFrame(raw)
test_viz = viz.PartitionViz(Mock(), {})
groups = ['groupA', 'groupB', 'groupC']
levels = test_viz.levels_for('agg_sum', groups, df)
nest = test_viz.nest_values(levels)
self.assertEqual(3, len(nest))
for i in range(0, 3):
self.assertEqual('metric' + str(i + 1), nest[i]['name'])
self.assertEqual(3, len(nest[0]['children']))
self.assertEqual(1, len(nest[0]['children'][0]['children']))
self.assertEqual(1, len(nest[0]['children'][0]['children'][0]['children']))
def test_nest_procs_returns_hierarchy(self):
raw = {}
raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300]
raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1']
raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2']
raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3']
raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9]
raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90]
raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900]
df = pd.DataFrame(raw)
test_viz = viz.PartitionViz(Mock(), {})
groups = ['groupA', 'groupB', 'groupC']
metrics = ['metric1', 'metric2', 'metric3']
procs = {}
for i in range(0, 4):
df_drop = df.drop(groups[i:], 1)
pivot = df_drop.pivot_table(
index=DTTM_ALIAS,
columns=groups[:i],
values=metrics,
)
procs[i] = pivot
nest = test_viz.nest_procs(procs)
self.assertEqual(3, len(nest))
for i in range(0, 3):
self.assertEqual('metric' + str(i + 1), nest[i]['name'])
self.assertEqual(None, nest[i].get('val'))
self.assertEqual(3, len(nest[0]['children']))
self.assertEqual(3, len(nest[0]['children'][0]['children']))
self.assertEqual(1, len(nest[0]['children'][0]['children'][0]['children']))
self.assertEqual(1,
len(nest[0]['children']
[0]['children']
[0]['children']
[0]['children'])
)
def test_get_data_calls_correct_method(self):
test_viz = viz.PartitionViz(Mock(), {})
df = Mock()
with self.assertRaises(ValueError):
test_viz.get_data(df)
test_viz.levels_for = Mock(return_value=1)
test_viz.nest_values = Mock(return_value=1)
test_viz.form_data['groupby'] = ['groups']
test_viz.form_data['time_series_option'] = 'not_time'
test_viz.get_data(df)
self.assertEqual('agg_sum', test_viz.levels_for.mock_calls[0][1][0])
test_viz.form_data['time_series_option'] = 'agg_sum'
test_viz.get_data(df)
self.assertEqual('agg_sum', test_viz.levels_for.mock_calls[1][1][0])
test_viz.form_data['time_series_option'] = 'agg_mean'
test_viz.get_data(df)
self.assertEqual('agg_mean', test_viz.levels_for.mock_calls[2][1][0])
test_viz.form_data['time_series_option'] = 'point_diff'
test_viz.levels_for_diff = Mock(return_value=1)
test_viz.get_data(df)
self.assertEqual('point_diff', test_viz.levels_for_diff.mock_calls[0][1][0])
test_viz.form_data['time_series_option'] = 'point_percent'
test_viz.get_data(df)
self.assertEqual('point_percent', test_viz.levels_for_diff.mock_calls[1][1][0])
test_viz.form_data['time_series_option'] = 'point_factor'
test_viz.get_data(df)
self.assertEqual('point_factor', test_viz.levels_for_diff.mock_calls[2][1][0])
test_viz.levels_for_time = Mock(return_value=1)
test_viz.nest_procs = Mock(return_value=1)
test_viz.form_data['time_series_option'] = 'adv_anal'
test_viz.get_data(df)
self.assertEqual(1, len(test_viz.levels_for_time.mock_calls))
self.assertEqual(1, len(test_viz.nest_procs.mock_calls))
test_viz.form_data['time_series_option'] = 'time_series'
test_viz.get_data(df)
self.assertEqual('agg_sum', test_viz.levels_for.mock_calls[3][1][0])
self.assertEqual(7, len(test_viz.nest_values.mock_calls))