diff --git a/superset/assets/images/viz_thumbnails/paired_ttest.png b/superset/assets/images/viz_thumbnails/paired_ttest.png new file mode 100644 index 00000000000..4f8ad71b121 Binary files /dev/null and b/superset/assets/images/viz_thumbnails/paired_ttest.png differ diff --git a/superset/assets/javascripts/explore/stores/controls.jsx b/superset/assets/javascripts/explore/stores/controls.jsx index 894b5a4f6fd..66659043401 100644 --- a/superset/assets/javascripts/explore/stores/controls.jsx +++ b/superset/assets/javascripts/explore/stores/controls.jsx @@ -1370,5 +1370,26 @@ export const controls = { description: t('The color scheme for rendering chart'), schemes: ALL_COLOR_SCHEMES, }, + + significance_level: { + type: 'TextControl', + label: 'Significance Level', + default: 0.05, + description: 'Threshold alpha level for determining significance', + }, + + pvalue_precision: { + type: 'TextControl', + label: 'p-value precision', + default: 6, + description: 'Number of decimal places with which to display p-values', + }, + + liftvalue_precision: { + type: 'TextControl', + label: 'Lift % precision', + default: 4, + description: 'Number of decimal places with which to display lift values', + }, }; export default controls; diff --git a/superset/assets/javascripts/explore/stores/visTypes.js b/superset/assets/javascripts/explore/stores/visTypes.js index 8298afce0d1..011f44d0776 100644 --- a/superset/assets/javascripts/explore/stores/visTypes.js +++ b/superset/assets/javascripts/explore/stores/visTypes.js @@ -1101,6 +1101,24 @@ export const visTypes = { }, }, }, + + paired_ttest: { + label: 'Time Series - Paired t-test', + showOnExplore: true, + requiresTime: true, + controlPanelSections: [ + sections.NVD3TimeSeries[0], + { + label: 'Paired t-test', + expanded: false, + controlSetRows: [ + ['significance_level'], + ['pvalue_precision'], + ['liftvalue_precision'], + ], + }, + ], + }, }; export default visTypes; diff --git a/superset/assets/package.json b/superset/assets/package.json index 729bb022a66..3b1c53b07e8 100644 --- a/superset/assets/package.json +++ b/superset/assets/package.json @@ -53,6 +53,7 @@ "d3-tip": "^0.6.7", "datamaps": "^0.5.8", "datatables.net-bs": "^1.10.15", + "distributions": "^1.0.0", "immutable": "^3.8.1", "jed": "^1.1.1", "po2json": "^0.4.5", diff --git a/superset/assets/visualizations/main.js b/superset/assets/visualizations/main.js index a02f508c330..d5c3abb1a7e 100644 --- a/superset/assets/visualizations/main.js +++ b/superset/assets/visualizations/main.js @@ -33,5 +33,6 @@ const vizMap = { world_map: require('./world_map.js'), dual_line: require('./nvd3_vis.js'), event_flow: require('./EventFlow.jsx'), + paired_ttest: require('./paired_ttest.jsx'), }; export default vizMap; diff --git a/superset/assets/visualizations/paired_ttest.css b/superset/assets/visualizations/paired_ttest.css new file mode 100644 index 00000000000..0a2c1b8d28f --- /dev/null +++ b/superset/assets/visualizations/paired_ttest.css @@ -0,0 +1,67 @@ +.paired_ttest .scrollbar-container { + overflow: scroll; +} + +.paired-ttest-table .scrollbar-content { + padding-left: 5px; + padding-right: 5px; + margin-bottom: 0; +} + +.paired-ttest-table h1 { + margin-left: 5px; +} + +.reactable-data tr, +.reactable-header-sortable { + -webkit-transition: ease-in-out 0.1s; + transition: ease-in-out 0.1s; +} + +.reactable-data tr:hover { + background-color: #e0e0e0; +} + +.reactable-data tr .false { + color: #f44336; +} + +.reactable-data tr .true { + color: #4caf50; +} + +.reactable-data tr .control { + color: #2196f3; +} + +.reactable-data tr .invalid { + color: #ff9800; +} + +.reactable-data .control td { + background-color: #eeeeee; +} + +.reactable-header-sortable:hover, +.reactable-header-sortable:focus, +.reactable-header-sort-asc, +.reactable-header-sort-desc { + background-color: #e0e0e0; + position: relative; +} + +.reactable-header-sort-asc:after { + content: '\25bc'; + position: absolute; + right: 10px; +} + +.reactable-header-sort-desc:after { + content: '\25b2'; + position: absolute; + right: 10px; +} + +.paired-ttest-table table { + margin-bottom: 0; +} diff --git a/superset/assets/visualizations/paired_ttest.jsx b/superset/assets/visualizations/paired_ttest.jsx new file mode 100644 index 00000000000..9febc798b09 --- /dev/null +++ b/superset/assets/visualizations/paired_ttest.jsx @@ -0,0 +1,277 @@ +import d3 from 'd3'; +import dist from 'distributions'; + +import React from 'react'; +import { Table, Tr, Td, Thead, Th } from 'reactable'; +import ReactDOM from 'react-dom'; +import PropTypes from 'prop-types'; + +import './paired_ttest.css'; + +class TTestTable extends React.Component { + + constructor(props) { + super(props); + this.state = { + pValues: [], + liftValues: [], + control: 0, + }; + } + + componentWillMount() { + this.computeTTest(this.state.control); // initially populate table + } + + getLiftStatus(row) { + // Get a css class name for coloring + if (row === this.state.control) { + return 'control'; + } + const liftVal = this.state.liftValues[row]; + if (isNaN(liftVal) || !isFinite(liftVal)) { + return 'invalid'; // infinite or NaN values + } + return liftVal >= 0 ? 'true' : 'false'; // green on true, red on false + } + + getPValueStatus(row) { + if (row === this.state.control) { + return 'control'; + } + const pVal = this.state.pValues[row]; + if (isNaN(pVal) || !isFinite(pVal)) { + return 'invalid'; + } + return ''; // p-values won't normally be colored + } + + getSignificance(row) { + // Color significant as green, else red + if (row === this.state.control) { + return 'control'; + } + // p-values significant below set threshold + return this.state.pValues[row] <= this.props.alpha; + } + + computeLift(values, control) { + // Compute the lift value between two time series + let sumValues = 0; + let sumControl = 0; + for (let i = 0; i < values.length; i++) { + sumValues += values[i].y; + sumControl += control[i].y; + } + return (((sumValues - sumControl) / sumControl) * 100) + .toFixed(this.props.liftValPrec); + } + + computePValue(values, control) { + // Compute the p-value from Student's t-test + // between two time series + let diffSum = 0; + let diffSqSum = 0; + let finiteCount = 0; + for (let i = 0; i < values.length; i++) { + const diff = control[i].y - values[i].y; + if (global.isFinite(diff)) { + finiteCount++; + diffSum += diff; + diffSqSum += diff * diff; + } + } + const tvalue = -Math.abs(diffSum * + Math.sqrt((finiteCount - 1) / + (finiteCount * diffSqSum - diffSum * diffSum))); + try { + return (2 * new dist.Studentt(finiteCount - 1).cdf(tvalue)) + .toFixed(this.props.pValPrec); // two-sided test + } catch (err) { + return NaN; + } + } + + computeTTest(control) { + // Compute lift and p-values for each row + // against the selected control + const data = this.props.data; + const pValues = []; + const liftValues = []; + if (!data) { + return; + } + for (let i = 0; i < data.length; i++) { + if (i === control) { + pValues.push('control'); + liftValues.push('control'); + } else { + pValues.push(this.computePValue(data[i].values, data[control].values)); + liftValues.push(this.computeLift(data[i].values, data[control].values)); + } + } + this.setState({ pValues, liftValues, control }); + } + + render() { + const data = this.props.data; + const metric = this.props.metric; + const groups = this.props.groups; + // Render column header for each group + const columns = groups.map((group, i) => ( + {group} + )); + const numGroups = groups.length; + // Columns for p-value, lift-value, and significance (true/false) + columns.push(p-value); + columns.push(Lift %); + columns.push(Significant); + const rows = data.map((entry, i) => { + const values = groups.map((group, j) => ( // group names + + )); + values.push( + , + ); + values.push( + , + ); + values.push( + , + ); + return ( + + {values} + + ); + }); + // When sorted ascending, 'control' will always be at top + const sortConfig = groups.concat([ + { + column: 'pValue', + sortFunction: (a, b) => { + if (a === 'control') { + return -1; + } + if (b === 'control') { + return 1; + } + return a > b ? 1 : -1; // p-values ascending + }, + }, + { + column: 'liftValue', + sortFunction: (a, b) => { + if (a === 'control') { + return -1; + } + if (b === 'control') { + return 1; + } + return parseFloat(a) > parseFloat(b) ? -1 : 1; // lift values descending + }, + }, + { + column: 'significant', + sortFunction: (a, b) => { + if (a === 'control') { + return -1; + } + if (b === 'control') { + return 1; + } + return a > b ? -1 : 1; // significant values first + }, + }, + ]); + return ( +
+

{metric}

+ + + {columns} + + {rows} +
+
+ ); + } +} + +TTestTable.propTypes = { + metric: PropTypes.string.isRequired, + groups: PropTypes.array.isRequired, + data: PropTypes.array.isRequired, + alpha: PropTypes.number.isRequired, + liftValPrec: PropTypes.number.isRequired, + pValPrec: PropTypes.number.isRequired, +}; +TTestTable.defaultProps = { + metric: '', + groups: [], + data: [], + alpha: 0.05, + liftValPrec: 4, + pValPrec: 6, +}; + +function pairedTTestVis(slice, payload) { + const div = d3.select(slice.selector); + const container = slice.container; + const height = slice.container.height(); + const fd = slice.formData; + const data = payload.data; + const alpha = fd.significance_level; + const pValPrec = fd.pvalue_precision; + const liftValPrec = fd.liftvalue_precision; + const tables = fd.metrics.map((metric, i) => ( // create a table for each metric + 32 ? 32 : pValPrec} + liftValPrec={liftValPrec > 32 ? 32 : liftValPrec} + /> + )); + div.html(''); + ReactDOM.render( +
+
+
+
+ {tables} +
+
+
+
, + div.node(), + ); + container.find('.scrollbar-container').css('max-height', height); +} + +module.exports = pairedTTestVis; diff --git a/superset/viz.py b/superset/viz.py index 2764d9133fa..47eb7a920a9 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -1649,6 +1649,65 @@ class EventFlowViz(BaseViz): return df.to_dict(orient="records") +class PairedTTestViz(BaseViz): + + """A table displaying paired t-test values""" + + viz_type = 'paired_ttest' + verbose_name = _("Time Series - Paired t-test") + sort_series = False + is_timeseries = True + + def get_data(self, df): + """ + Transform received data frame into an object of the form: + { + "metric1": [ + { + groups: ('groupA', ... ), + values: [ {x, y}, ... ], + }, ... + ], ... + } + """ + fd = self.form_data + groups = fd.get('groupby') + metrics = fd.get('metrics') + df.fillna(0) + df = df.pivot_table( + index=DTTM_ALIAS, + columns=groups, + values=metrics) + cols = [] + # Be rid of falsey keys + for col in df.columns: + if col == '': + cols.append('N/A') + elif col is None: + cols.append('NULL') + else: + cols.append(col) + df.columns = cols + data = {} + series = df.to_dict('series') + for nameSet in df.columns: + # If no groups are defined, nameSet will be the metric name + hasGroup = not isinstance(nameSet, string_types) + Y = series[nameSet] + d = { + 'group': nameSet[1:] if hasGroup else 'All', + 'values': [ + {'x': t, 'y': Y[t] if t in Y else None} + for t in df.index + ], + } + key = nameSet[0] if hasGroup else nameSet + if key in data: + data[key].append(d) + else: + data[key] = [d] + return data + viz_types_list = [ TableViz, @@ -1684,6 +1743,7 @@ viz_types_list = [ HistogramViz, SeparatorViz, EventFlowViz, + PairedTTestViz, ] viz_types = OrderedDict([(v.viz_type, v) for v in viz_types_list diff --git a/tests/viz_tests.py b/tests/viz_tests.py new file mode 100644 index 00000000000..a4beab3e987 --- /dev/null +++ b/tests/viz_tests.py @@ -0,0 +1,137 @@ +import unittest +import pandas as pd +import superset.viz as viz + +from superset.utils import DTTM_ALIAS + + +class PairedTTestTestCase(unittest.TestCase): + def test_get_data_transforms_dataframe(self): + form_data = { + 'groupby': ['groupA', 'groupB', 'groupC'], + 'metrics': ['metric1', 'metric2', 'metric3'] + } + datasource = {'type': 'table'} + # Test data + raw = {} + raw[DTTM_ALIAS] = [100, 200, 300, 100, 200, 300, 100, 200, 300] + raw['groupA'] = ['a1', 'a1', 'a1', 'b1', 'b1', 'b1', 'c1', 'c1', 'c1'] + raw['groupB'] = ['a2', 'a2', 'a2', 'b2', 'b2', 'b2', 'c2', 'c2', 'c2'] + raw['groupC'] = ['a3', 'a3', 'a3', 'b3', 'b3', 'b3', 'c3', 'c3', 'c3'] + raw['metric1'] = [1, 2, 3, 4, 5, 6, 7, 8, 9] + raw['metric2'] = [10, 20, 30, 40, 50, 60, 70, 80, 90] + raw['metric3'] = [100, 200, 300, 400, 500, 600, 700, 800, 900] + df = pd.DataFrame(raw) + pairedTTestViz = viz.viz_types['paired_ttest'](datasource, form_data) + data = pairedTTestViz.get_data(df) + # Check method correctly transforms data + expected = { + 'metric1': [ + { + 'values': [ + {'x': 100, 'y': 1}, + {'x': 200, 'y': 2}, + {'x': 300, 'y': 3}], + 'group': ('a1', 'a2', 'a3'), + }, + { + 'values': [ + {'x': 100, 'y': 4}, + {'x': 200, 'y': 5}, + {'x': 300, 'y': 6}], + 'group': ('b1', 'b2', 'b3'), + }, + { + 'values': [ + {'x': 100, 'y': 7}, + {'x': 200, 'y': 8}, + {'x': 300, 'y': 9}], + 'group': ('c1', 'c2', 'c3'), + }, + ], + 'metric2': [ + { + 'values': [ + {'x': 100, 'y': 10}, + {'x': 200, 'y': 20}, + {'x': 300, 'y': 30}], + 'group': ('a1', 'a2', 'a3'), + }, + { + 'values': [ + {'x': 100, 'y': 40}, + {'x': 200, 'y': 50}, + {'x': 300, 'y': 60}], + 'group': ('b1', 'b2', 'b3'), + }, + { + 'values': [ + {'x': 100, 'y': 70}, + {'x': 200, 'y': 80}, + {'x': 300, 'y': 90}], + 'group': ('c1', 'c2', 'c3'), + }, + ], + 'metric3': [ + { + 'values': [ + {'x': 100, 'y': 100}, + {'x': 200, 'y': 200}, + {'x': 300, 'y': 300}], + 'group': ('a1', 'a2', 'a3'), + }, + { + 'values': [ + {'x': 100, 'y': 400}, + {'x': 200, 'y': 500}, + {'x': 300, 'y': 600}], + 'group': ('b1', 'b2', 'b3'), + }, + { + 'values': [ + {'x': 100, 'y': 700}, + {'x': 200, 'y': 800}, + {'x': 300, 'y': 900}], + 'group': ('c1', 'c2', 'c3'), + }, + ], + } + self.assertEquals(data, expected) + + def test_get_data_empty_null_keys(self): + form_data = { + 'groupby': [], + 'metrics': ['', None] + } + datasource = {'type': 'table'} + # Test data + raw = {} + raw[DTTM_ALIAS] = [100, 200, 300] + raw[''] = [1, 2, 3] + raw[None] = [10, 20, 30] + + df = pd.DataFrame(raw) + pairedTTestViz = viz.viz_types['paired_ttest'](datasource, form_data) + data = pairedTTestViz.get_data(df) + # Check method correctly transforms data + expected = { + 'N/A': [ + { + 'values': [ + {'x': 100, 'y': 1}, + {'x': 200, 'y': 2}, + {'x': 300, 'y': 3}], + 'group': 'All', + }, + ], + 'NULL': [ + { + 'values': [ + {'x': 100, 'y': 10}, + {'x': 200, 'y': 20}, + {'x': 300, 'y': 30}], + 'group': 'All', + }, + ], + } + self.assertEquals(data, expected)