mirror of
https://github.com/apache/superset.git
synced 2026-04-11 04:15:33 +00:00
1593 lines
50 KiB
Python
Executable File
1593 lines
50 KiB
Python
Executable File
"""This module contains the "Viz" objects
|
|
|
|
These objects represent the backend of all the visualizations that
|
|
Superset can render.
|
|
"""
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
from __future__ import unicode_literals
|
|
|
|
import copy
|
|
import hashlib
|
|
import logging
|
|
import traceback
|
|
import uuid
|
|
import zlib
|
|
|
|
from collections import OrderedDict, defaultdict
|
|
from datetime import datetime, timedelta
|
|
|
|
import pandas as pd
|
|
import numpy as np
|
|
from flask import request
|
|
from flask_babel import lazy_gettext as _
|
|
from markdown import markdown
|
|
import simplejson as json
|
|
from six import string_types, PY3
|
|
from dateutil import relativedelta as rdelta
|
|
|
|
from superset import app, utils, cache
|
|
from superset.utils import DTTM_ALIAS
|
|
|
|
config = app.config
|
|
stats_logger = config.get('STATS_LOGGER')
|
|
|
|
|
|
class BaseViz(object):
|
|
|
|
"""All visualizations derive this base class"""
|
|
|
|
viz_type = None
|
|
verbose_name = "Base Viz"
|
|
credits = ""
|
|
is_timeseries = False
|
|
|
|
def __init__(self, datasource, form_data):
|
|
if not datasource:
|
|
raise Exception("Viz is missing a datasource")
|
|
self.datasource = datasource
|
|
self.request = request
|
|
self.viz_type = form_data.get("viz_type")
|
|
self.form_data = form_data
|
|
|
|
self.query = ""
|
|
self.token = self.form_data.get(
|
|
'token', 'token_' + uuid.uuid4().hex[:8])
|
|
self.metrics = self.form_data.get('metrics') or []
|
|
self.groupby = self.form_data.get('groupby') or []
|
|
|
|
self.status = None
|
|
self.error_message = None
|
|
|
|
def get_df(self, query_obj=None):
|
|
"""Returns a pandas dataframe based on the query object"""
|
|
if not query_obj:
|
|
query_obj = self.query_obj()
|
|
|
|
self.error_msg = ""
|
|
self.results = None
|
|
|
|
timestamp_format = None
|
|
if self.datasource.type == 'table':
|
|
dttm_col = self.datasource.get_col(query_obj['granularity'])
|
|
if dttm_col:
|
|
timestamp_format = dttm_col.python_date_format
|
|
|
|
# The datasource here can be different backend but the interface is common
|
|
self.results = self.datasource.query(query_obj)
|
|
self.query = self.results.query
|
|
self.status = self.results.status
|
|
self.error_message = self.results.error_message
|
|
|
|
df = self.results.df
|
|
# Transform the timestamp we received from database to pandas supported
|
|
# datetime format. If no python_date_format is specified, the pattern will
|
|
# be considered as the default ISO date format
|
|
# If the datetime format is unix, the parse will use the corresponding
|
|
# parsing logic.
|
|
if df is None or df.empty:
|
|
self.status = utils.QueryStatus.FAILED
|
|
if not self.error_message:
|
|
self.error_message = "No data."
|
|
return pd.DataFrame()
|
|
else:
|
|
if DTTM_ALIAS in df.columns:
|
|
if timestamp_format in ("epoch_s", "epoch_ms"):
|
|
df[DTTM_ALIAS] = pd.to_datetime(df[DTTM_ALIAS], utc=False)
|
|
else:
|
|
df[DTTM_ALIAS] = pd.to_datetime(
|
|
df[DTTM_ALIAS], utc=False, format=timestamp_format)
|
|
if self.datasource.offset:
|
|
df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset)
|
|
df.replace([np.inf, -np.inf], np.nan)
|
|
df = df.fillna(0)
|
|
return df
|
|
|
|
def get_extra_filters(self):
|
|
extra_filters = self.form_data.get('extra_filters', [])
|
|
return {f['col']: f['val'] for f in extra_filters}
|
|
|
|
def query_obj(self):
|
|
"""Building a query object"""
|
|
form_data = self.form_data
|
|
groupby = form_data.get("groupby") or []
|
|
metrics = form_data.get("metrics") or []
|
|
|
|
# extra_filters are temporary/contextual filters that are external
|
|
# to the slice definition. We use those for dynamic interactive
|
|
# filters like the ones emitted by the "Filter Box" visualization
|
|
extra_filters = self.get_extra_filters()
|
|
granularity = (
|
|
form_data.get("granularity") or form_data.get("granularity_sqla")
|
|
)
|
|
limit = int(form_data.get("limit") or 0)
|
|
timeseries_limit_metric = form_data.get("timeseries_limit_metric")
|
|
row_limit = int(
|
|
form_data.get("row_limit") or config.get("ROW_LIMIT"))
|
|
|
|
# __form and __to are special extra_filters that target time
|
|
# boundaries. The rest of extra_filters are simple
|
|
# [column_name in list_of_values]. `__` prefix is there to avoid
|
|
# potential conflicts with column that would be named `from` or `to`
|
|
since = (
|
|
extra_filters.get('__from') or
|
|
form_data.get("since") or
|
|
config.get("SUPERSET_DEFAULT_SINCE", "1 year ago")
|
|
)
|
|
|
|
from_dttm = utils.parse_human_datetime(since)
|
|
now = datetime.now()
|
|
if from_dttm > now:
|
|
from_dttm = now - (from_dttm - now)
|
|
|
|
until = extra_filters.get('__to') or form_data.get("until", "now")
|
|
to_dttm = utils.parse_human_datetime(until)
|
|
if from_dttm > to_dttm:
|
|
raise Exception("From date cannot be larger than to date")
|
|
|
|
# extras are used to query elements specific to a datasource type
|
|
# for instance the extra where clause that applies only to Tables
|
|
extras = {
|
|
'where': form_data.get("where", ''),
|
|
'having': form_data.get("having", ''),
|
|
'having_druid': form_data.get('having_filters') \
|
|
if 'having_filters' in form_data else [],
|
|
'time_grain_sqla': form_data.get("time_grain_sqla", ''),
|
|
'druid_time_origin': form_data.get("druid_time_origin", ''),
|
|
}
|
|
filters = form_data['filters'] if 'filters' in form_data \
|
|
else []
|
|
for col, vals in self.get_extra_filters().items():
|
|
if not (col and vals) or col.startswith('__'):
|
|
continue
|
|
elif col in self.datasource.filterable_column_names:
|
|
# Quote values with comma to avoid conflict
|
|
filters += [{
|
|
'col': col,
|
|
'op': 'in',
|
|
'val': vals,
|
|
}]
|
|
d = {
|
|
'granularity': granularity,
|
|
'from_dttm': from_dttm,
|
|
'to_dttm': to_dttm,
|
|
'is_timeseries': self.is_timeseries,
|
|
'groupby': groupby,
|
|
'metrics': metrics,
|
|
'row_limit': row_limit,
|
|
'filter': filters,
|
|
'timeseries_limit': limit,
|
|
'extras': extras,
|
|
'timeseries_limit_metric': timeseries_limit_metric,
|
|
'form_data': form_data,
|
|
}
|
|
return d
|
|
|
|
@property
|
|
def cache_timeout(self):
|
|
if self.form_data.get('cache_timeout'):
|
|
return int(self.form_data.get('cache_timeout'))
|
|
if self.datasource.cache_timeout:
|
|
return self.datasource.cache_timeout
|
|
if (
|
|
hasattr(self.datasource, 'database') and
|
|
self.datasource.database.cache_timeout):
|
|
return self.datasource.database.cache_timeout
|
|
return config.get("CACHE_DEFAULT_TIMEOUT")
|
|
|
|
def get_json(self, force=False):
|
|
return json.dumps(
|
|
self.get_payload(force),
|
|
default=utils.json_int_dttm_ser, ignore_nan=True)
|
|
|
|
@property
|
|
def cache_key(self):
|
|
s = str([(k, self.form_data[k]) for k in sorted(self.form_data.keys())])
|
|
return hashlib.md5(s.encode('utf-8')).hexdigest()
|
|
|
|
def get_payload(self, force=False):
|
|
"""Handles caching around the json payload retrieval"""
|
|
cache_key = self.cache_key
|
|
payload = None
|
|
force = force if force else self.form_data.get('force') == 'true'
|
|
if not force and cache:
|
|
payload = cache.get(cache_key)
|
|
|
|
if payload:
|
|
stats_logger.incr('loaded_from_source')
|
|
is_cached = True
|
|
try:
|
|
cached_data = zlib.decompress(payload)
|
|
if PY3:
|
|
cached_data = cached_data.decode('utf-8')
|
|
payload = json.loads(cached_data)
|
|
except Exception as e:
|
|
logging.error("Error reading cache: " +
|
|
utils.error_msg_from_exception(e))
|
|
payload = None
|
|
logging.info("Serving from cache")
|
|
|
|
if not payload:
|
|
stats_logger.incr('loaded_from_cache')
|
|
data = None
|
|
is_cached = False
|
|
cache_timeout = self.cache_timeout
|
|
stacktrace = None
|
|
try:
|
|
df = self.get_df()
|
|
if not self.error_message:
|
|
data = self.get_data(df)
|
|
except Exception as e:
|
|
logging.exception(e)
|
|
if not self.error_message:
|
|
self.error_message = str(e)
|
|
self.status = utils.QueryStatus.FAILED
|
|
data = None
|
|
stacktrace = traceback.format_exc()
|
|
payload = {
|
|
'cache_key': cache_key,
|
|
'cache_timeout': cache_timeout,
|
|
'data': data,
|
|
'error': self.error_message,
|
|
'form_data': self.form_data,
|
|
'query': self.query,
|
|
'status': self.status,
|
|
'stacktrace': stacktrace,
|
|
}
|
|
payload['cached_dttm'] = datetime.utcnow().isoformat().split('.')[0]
|
|
logging.info("Caching for the next {} seconds".format(
|
|
cache_timeout))
|
|
data = self.json_dumps(payload)
|
|
if PY3:
|
|
data = bytes(data, 'utf-8')
|
|
if cache and self.status != utils.QueryStatus.FAILED:
|
|
try:
|
|
cache.set(
|
|
cache_key,
|
|
zlib.compress(data),
|
|
timeout=cache_timeout)
|
|
except Exception as e:
|
|
# cache.set call can fail if the backend is down or if
|
|
# the key is too large or whatever other reasons
|
|
logging.warning("Could not cache key {}".format(cache_key))
|
|
logging.exception(e)
|
|
cache.delete(cache_key)
|
|
payload['is_cached'] = is_cached
|
|
return payload
|
|
|
|
def json_dumps(self, obj):
|
|
return json.dumps(obj, default=utils.json_int_dttm_ser, ignore_nan=True)
|
|
|
|
@property
|
|
def data(self):
|
|
"""This is the data object serialized to the js layer"""
|
|
content = {
|
|
'form_data': self.form_data,
|
|
'token': self.token,
|
|
'viz_name': self.viz_type,
|
|
'filter_select_enabled': self.datasource.filter_select_enabled,
|
|
}
|
|
return content
|
|
|
|
def get_csv(self):
|
|
df = self.get_df()
|
|
include_index = not isinstance(df.index, pd.RangeIndex)
|
|
return df.to_csv(index=include_index, encoding="utf-8")
|
|
|
|
def get_data(self, df):
|
|
return []
|
|
|
|
@property
|
|
def json_data(self):
|
|
return json.dumps(self.data)
|
|
|
|
|
|
class TableViz(BaseViz):
|
|
|
|
"""A basic html table that is sortable and searchable"""
|
|
|
|
viz_type = "table"
|
|
verbose_name = _("Table View")
|
|
credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original'
|
|
is_timeseries = False
|
|
|
|
def should_be_timeseries(self):
|
|
fd = self.form_data
|
|
# TODO handle datasource-type-specific code in datasource
|
|
conditions_met = (
|
|
(fd.get('granularity') and fd.get('granularity') != 'all') or
|
|
(fd.get('granularity_sqla') and fd.get('time_grain_sqla'))
|
|
)
|
|
if fd.get('include_time') and not conditions_met:
|
|
raise Exception(
|
|
"Pick a granularity in the Time section or "
|
|
"uncheck 'Include Time'")
|
|
return fd.get('include_time')
|
|
|
|
def query_obj(self):
|
|
d = super(TableViz, self).query_obj()
|
|
fd = self.form_data
|
|
|
|
if fd.get('all_columns') and (fd.get('groupby') or fd.get('metrics')):
|
|
raise Exception(
|
|
"Choose either fields to [Group By] and [Metrics] or "
|
|
"[Columns], not both")
|
|
|
|
if fd.get('all_columns'):
|
|
d['columns'] = fd.get('all_columns')
|
|
d['groupby'] = []
|
|
order_by_cols = fd.get('order_by_cols') or []
|
|
d['orderby'] = [json.loads(t) for t in order_by_cols]
|
|
|
|
d['is_timeseries'] = self.should_be_timeseries()
|
|
return d
|
|
|
|
def get_data(self, df):
|
|
if not self.should_be_timeseries() and DTTM_ALIAS in df:
|
|
del df[DTTM_ALIAS]
|
|
|
|
return dict(
|
|
records=df.to_dict(orient="records"),
|
|
columns=list(df.columns),
|
|
)
|
|
|
|
|
|
class PivotTableViz(BaseViz):
|
|
|
|
"""A pivot table view, define your rows, columns and metrics"""
|
|
|
|
viz_type = "pivot_table"
|
|
verbose_name = _("Pivot Table")
|
|
credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original'
|
|
is_timeseries = False
|
|
|
|
def query_obj(self):
|
|
d = super(PivotTableViz, self).query_obj()
|
|
groupby = self.form_data.get('groupby')
|
|
columns = self.form_data.get('columns')
|
|
metrics = self.form_data.get('metrics')
|
|
if not columns:
|
|
columns = []
|
|
if not groupby:
|
|
groupby = []
|
|
if not groupby:
|
|
raise Exception("Please choose at least one \"Group by\" field ")
|
|
if not metrics:
|
|
raise Exception("Please choose at least one metric")
|
|
if (
|
|
any(v in groupby for v in columns) or
|
|
any(v in columns for v in groupby)):
|
|
raise Exception("groupby and columns can't overlap")
|
|
|
|
d['groupby'] = list(set(groupby) | set(columns))
|
|
return d
|
|
|
|
def get_data(self, df):
|
|
if (
|
|
self.form_data.get("granularity") == "all" and
|
|
DTTM_ALIAS in df):
|
|
del df[DTTM_ALIAS]
|
|
df = df.pivot_table(
|
|
index=self.form_data.get('groupby'),
|
|
columns=self.form_data.get('columns'),
|
|
values=self.form_data.get('metrics'),
|
|
aggfunc=self.form_data.get('pandas_aggfunc'),
|
|
margins=True,
|
|
)
|
|
return dict(
|
|
columns=list(df.columns),
|
|
html=df.to_html(
|
|
na_rep='',
|
|
classes=(
|
|
"dataframe table table-striped table-bordered "
|
|
"table-condensed table-hover").split(" ")),
|
|
)
|
|
|
|
|
|
class MarkupViz(BaseViz):
|
|
|
|
"""Use html or markdown to create a free form widget"""
|
|
|
|
viz_type = "markup"
|
|
verbose_name = _("Markup")
|
|
is_timeseries = False
|
|
|
|
def get_df(self):
|
|
return True
|
|
|
|
def get_data(self, df):
|
|
markup_type = self.form_data.get("markup_type")
|
|
code = self.form_data.get("code", '')
|
|
if markup_type == "markdown":
|
|
code = markdown(code)
|
|
return dict(html=code)
|
|
|
|
|
|
class SeparatorViz(MarkupViz):
|
|
|
|
"""Use to create section headers in a dashboard, similar to `Markup`"""
|
|
|
|
viz_type = "separator"
|
|
verbose_name = _("Separator")
|
|
|
|
def get_data(self, df):
|
|
code = markdown(self.form_data.get("code", ''))
|
|
return dict(html=code)
|
|
|
|
|
|
class WordCloudViz(BaseViz):
|
|
|
|
"""Build a colorful word cloud
|
|
|
|
Uses the nice library at:
|
|
https://github.com/jasondavies/d3-cloud
|
|
"""
|
|
|
|
viz_type = "word_cloud"
|
|
verbose_name = _("Word Cloud")
|
|
is_timeseries = False
|
|
|
|
def query_obj(self):
|
|
d = super(WordCloudViz, self).query_obj()
|
|
|
|
d['metrics'] = [self.form_data.get('metric')]
|
|
d['groupby'] = [self.form_data.get('series')]
|
|
return d
|
|
|
|
def get_data(self, df):
|
|
# Ordering the columns
|
|
df = df[[self.form_data.get('series'), self.form_data.get('metric')]]
|
|
# Labeling the columns for uniform json schema
|
|
df.columns = ['text', 'size']
|
|
return df.to_dict(orient="records")
|
|
|
|
|
|
class TreemapViz(BaseViz):
|
|
|
|
"""Tree map visualisation for hierarchical data."""
|
|
|
|
viz_type = "treemap"
|
|
verbose_name = _("Treemap")
|
|
credits = '<a href="https://d3js.org">d3.js</a>'
|
|
is_timeseries = False
|
|
|
|
def _nest(self, metric, df):
|
|
nlevels = df.index.nlevels
|
|
if nlevels == 1:
|
|
result = [{"name": n, "value": v}
|
|
for n, v in zip(df.index, df[metric])]
|
|
else:
|
|
result = [{"name": l, "children": self._nest(metric, df.loc[l])}
|
|
for l in df.index.levels[0]]
|
|
return result
|
|
|
|
def get_data(self, df):
|
|
df = df.set_index(self.form_data.get("groupby"))
|
|
chart_data = [{"name": metric, "children": self._nest(metric, df)}
|
|
for metric in df.columns]
|
|
return chart_data
|
|
|
|
|
|
class CalHeatmapViz(BaseViz):
|
|
|
|
"""Calendar heatmap."""
|
|
|
|
viz_type = "cal_heatmap"
|
|
verbose_name = _("Calendar Heatmap")
|
|
credits = (
|
|
'<a href=https://github.com/wa0x6e/cal-heatmap>cal-heatmap</a>')
|
|
is_timeseries = True
|
|
|
|
def get_data(self, df):
|
|
form_data = self.form_data
|
|
|
|
df.columns = ["timestamp", "metric"]
|
|
timestamps = {str(obj["timestamp"].value / 10**9):
|
|
obj.get("metric") for obj in df.to_dict("records")}
|
|
|
|
start = utils.parse_human_datetime(form_data.get("since"))
|
|
end = utils.parse_human_datetime(form_data.get("until"))
|
|
domain = form_data.get("domain_granularity")
|
|
diff_delta = rdelta.relativedelta(end, start)
|
|
diff_secs = (end - start).total_seconds()
|
|
|
|
if domain == "year":
|
|
range_ = diff_delta.years + 1
|
|
elif domain == "month":
|
|
range_ = diff_delta.years * 12 + diff_delta.months + 1
|
|
elif domain == "week":
|
|
range_ = diff_delta.years * 53 + diff_delta.weeks + 1
|
|
elif domain == "day":
|
|
range_ = diff_secs // (24*60*60) + 1
|
|
else:
|
|
range_ = diff_secs // (60*60) + 1
|
|
|
|
return {
|
|
"timestamps": timestamps,
|
|
"start": start,
|
|
"domain": domain,
|
|
"subdomain": form_data.get("subdomain_granularity"),
|
|
"range": range_,
|
|
}
|
|
|
|
def query_obj(self):
|
|
qry = super(CalHeatmapViz, self).query_obj()
|
|
qry["metrics"] = [self.form_data["metric"]]
|
|
return qry
|
|
|
|
|
|
class NVD3Viz(BaseViz):
|
|
|
|
"""Base class for all nvd3 vizs"""
|
|
|
|
credits = '<a href="http://nvd3.org/">NVD3.org</a>'
|
|
viz_type = None
|
|
verbose_name = "Base NVD3 Viz"
|
|
is_timeseries = False
|
|
|
|
|
|
class BoxPlotViz(NVD3Viz):
|
|
|
|
"""Box plot viz from ND3"""
|
|
|
|
viz_type = "box_plot"
|
|
verbose_name = _("Box Plot")
|
|
sort_series = False
|
|
is_timeseries = True
|
|
|
|
def to_series(self, df, classed='', title_suffix=''):
|
|
label_sep = " - "
|
|
chart_data = []
|
|
for index_value, row in zip(df.index, df.to_dict(orient="records")):
|
|
if isinstance(index_value, tuple):
|
|
index_value = label_sep.join(index_value)
|
|
boxes = defaultdict(dict)
|
|
for (label, key), value in row.items():
|
|
if key == "median":
|
|
key = "Q2"
|
|
boxes[label][key] = value
|
|
for label, box in boxes.items():
|
|
if len(self.form_data.get("metrics")) > 1:
|
|
# need to render data labels with metrics
|
|
chart_label = label_sep.join([index_value, label])
|
|
else:
|
|
chart_label = index_value
|
|
chart_data.append({
|
|
"label": chart_label,
|
|
"values": box,
|
|
})
|
|
return chart_data
|
|
|
|
def get_data(self, df):
|
|
form_data = self.form_data
|
|
df = df.fillna(0)
|
|
|
|
# conform to NVD3 names
|
|
def Q1(series): # need to be named functions - can't use lambdas
|
|
return np.percentile(series, 25)
|
|
|
|
def Q3(series):
|
|
return np.percentile(series, 75)
|
|
|
|
whisker_type = form_data.get('whisker_options')
|
|
if whisker_type == "Tukey":
|
|
|
|
def whisker_high(series):
|
|
upper_outer_lim = Q3(series) + 1.5 * (Q3(series) - Q1(series))
|
|
series = series[series <= upper_outer_lim]
|
|
return series[np.abs(series - upper_outer_lim).argmin()]
|
|
|
|
def whisker_low(series):
|
|
lower_outer_lim = Q1(series) - 1.5 * (Q3(series) - Q1(series))
|
|
# find the closest value above the lower outer limit
|
|
series = series[series >= lower_outer_lim]
|
|
return series[np.abs(series - lower_outer_lim).argmin()]
|
|
|
|
elif whisker_type == "Min/max (no outliers)":
|
|
|
|
def whisker_high(series):
|
|
return series.max()
|
|
|
|
def whisker_low(series):
|
|
return series.min()
|
|
|
|
elif " percentiles" in whisker_type:
|
|
low, high = whisker_type.replace(" percentiles", "").split("/")
|
|
|
|
def whisker_high(series):
|
|
return np.percentile(series, int(high))
|
|
|
|
def whisker_low(series):
|
|
return np.percentile(series, int(low))
|
|
|
|
else:
|
|
raise ValueError("Unknown whisker type: {}".format(whisker_type))
|
|
|
|
def outliers(series):
|
|
above = series[series > whisker_high(series)]
|
|
below = series[series < whisker_low(series)]
|
|
# pandas sometimes doesn't like getting lists back here
|
|
return set(above.tolist() + below.tolist())
|
|
|
|
aggregate = [Q1, np.median, Q3, whisker_high, whisker_low, outliers]
|
|
df = df.groupby(form_data.get('groupby')).agg(aggregate)
|
|
chart_data = self.to_series(df)
|
|
return chart_data
|
|
|
|
|
|
class BubbleViz(NVD3Viz):
|
|
|
|
"""Based on the NVD3 bubble chart"""
|
|
|
|
viz_type = "bubble"
|
|
verbose_name = _("Bubble Chart")
|
|
is_timeseries = False
|
|
|
|
def query_obj(self):
|
|
form_data = self.form_data
|
|
d = super(BubbleViz, self).query_obj()
|
|
d['groupby'] = [
|
|
form_data.get('entity')
|
|
]
|
|
if form_data.get('series'):
|
|
d['groupby'].append(form_data.get('series'))
|
|
self.x_metric = form_data.get('x')
|
|
self.y_metric = form_data.get('y')
|
|
self.z_metric = form_data.get('size')
|
|
self.entity = form_data.get('entity')
|
|
self.series = form_data.get('series') or self.entity
|
|
d['row_limit'] = form_data.get('limit')
|
|
|
|
d['metrics'] = [
|
|
self.z_metric,
|
|
self.x_metric,
|
|
self.y_metric,
|
|
]
|
|
if not all(d['metrics'] + [self.entity]):
|
|
raise Exception("Pick a metric for x, y and size")
|
|
return d
|
|
|
|
def get_data(self, df):
|
|
df['x'] = df[[self.x_metric]]
|
|
df['y'] = df[[self.y_metric]]
|
|
df['size'] = df[[self.z_metric]]
|
|
df['shape'] = 'circle'
|
|
df['group'] = df[[self.series]]
|
|
|
|
series = defaultdict(list)
|
|
for row in df.to_dict(orient='records'):
|
|
series[row['group']].append(row)
|
|
chart_data = []
|
|
for k, v in series.items():
|
|
chart_data.append({
|
|
'key': k,
|
|
'values': v})
|
|
return chart_data
|
|
|
|
|
|
class BulletViz(NVD3Viz):
|
|
|
|
"""Based on the NVD3 bullet chart"""
|
|
|
|
viz_type = "bullet"
|
|
verbose_name = _("Bullet Chart")
|
|
is_timeseries = False
|
|
|
|
def query_obj(self):
|
|
form_data = self.form_data
|
|
d = super(BulletViz, self).query_obj()
|
|
self.metric = form_data.get('metric')
|
|
|
|
def as_strings(field):
|
|
value = form_data.get(field)
|
|
return value.split(',') if value else []
|
|
|
|
def as_floats(field):
|
|
return [float(x) for x in as_strings(field)]
|
|
|
|
self.ranges = as_floats('ranges')
|
|
self.range_labels = as_strings('range_labels')
|
|
self.markers = as_floats('markers')
|
|
self.marker_labels = as_strings('marker_labels')
|
|
self.marker_lines = as_floats('marker_lines')
|
|
self.marker_line_labels = as_strings('marker_line_labels')
|
|
|
|
d['metrics'] = [
|
|
self.metric,
|
|
]
|
|
if not self.metric:
|
|
raise Exception("Pick a metric to display")
|
|
return d
|
|
|
|
def get_data(self, df):
|
|
df = df.fillna(0)
|
|
df['metric'] = df[[self.metric]]
|
|
values = df['metric'].values
|
|
return {
|
|
'measures': values.tolist(),
|
|
'ranges': self.ranges or [0, values.max() * 1.1],
|
|
'rangeLabels': self.range_labels or None,
|
|
'markers': self.markers or None,
|
|
'markerLabels': self.marker_labels or None,
|
|
'markerLines': self.marker_lines or None,
|
|
'markerLineLabels': self.marker_line_labels or None,
|
|
}
|
|
|
|
|
|
class BigNumberViz(BaseViz):
|
|
|
|
"""Put emphasis on a single metric with this big number viz"""
|
|
|
|
viz_type = "big_number"
|
|
verbose_name = _("Big Number with Trendline")
|
|
credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original'
|
|
is_timeseries = True
|
|
|
|
def query_obj(self):
|
|
d = super(BigNumberViz, self).query_obj()
|
|
metric = self.form_data.get('metric')
|
|
if not metric:
|
|
raise Exception("Pick a metric!")
|
|
d['metrics'] = [self.form_data.get('metric')]
|
|
self.form_data['metric'] = metric
|
|
return d
|
|
|
|
def get_data(self, df):
|
|
form_data = self.form_data
|
|
df.sort_values(by=df.columns[0], inplace=True)
|
|
compare_lag = form_data.get("compare_lag")
|
|
return {
|
|
'data': df.values.tolist(),
|
|
'compare_lag': compare_lag,
|
|
'compare_suffix': form_data.get('compare_suffix', ''),
|
|
}
|
|
|
|
|
|
class BigNumberTotalViz(BaseViz):
|
|
|
|
"""Put emphasis on a single metric with this big number viz"""
|
|
|
|
viz_type = "big_number_total"
|
|
verbose_name = _("Big Number")
|
|
credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original'
|
|
is_timeseries = False
|
|
|
|
def query_obj(self):
|
|
d = super(BigNumberTotalViz, self).query_obj()
|
|
metric = self.form_data.get('metric')
|
|
if not metric:
|
|
raise Exception("Pick a metric!")
|
|
d['metrics'] = [self.form_data.get('metric')]
|
|
self.form_data['metric'] = metric
|
|
return d
|
|
|
|
def get_data(self, df):
|
|
form_data = self.form_data
|
|
df.sort_values(by=df.columns[0], inplace=True)
|
|
return {
|
|
'data': df.values.tolist(),
|
|
'subheader': form_data.get('subheader', ''),
|
|
}
|
|
|
|
|
|
class NVD3TimeSeriesViz(NVD3Viz):
|
|
|
|
"""A rich line chart component with tons of options"""
|
|
|
|
viz_type = "line"
|
|
verbose_name = _("Time Series - Line Chart")
|
|
sort_series = False
|
|
is_timeseries = True
|
|
|
|
def to_series(self, df, classed='', title_suffix=''):
|
|
cols = []
|
|
for col in df.columns:
|
|
if col == '':
|
|
cols.append('N/A')
|
|
elif col is None:
|
|
cols.append('NULL')
|
|
else:
|
|
cols.append(col)
|
|
df.columns = cols
|
|
series = df.to_dict('series')
|
|
|
|
chart_data = []
|
|
for name in df.T.index.tolist():
|
|
ys = series[name]
|
|
if df[name].dtype.kind not in "biufc":
|
|
continue
|
|
if isinstance(name, string_types):
|
|
series_title = name
|
|
else:
|
|
name = ["{}".format(s) for s in name]
|
|
if len(self.form_data.get('metrics')) > 1:
|
|
series_title = ", ".join(name)
|
|
else:
|
|
series_title = ", ".join(name[1:])
|
|
if title_suffix:
|
|
series_title += title_suffix
|
|
|
|
d = {
|
|
"key": series_title,
|
|
"classed": classed,
|
|
"values": [
|
|
{'x': ds, 'y': ys[ds] if ds in ys else None}
|
|
for ds in df.index
|
|
],
|
|
}
|
|
chart_data.append(d)
|
|
return chart_data
|
|
|
|
def get_data(self, df):
|
|
fd = self.form_data
|
|
df = df.fillna(0)
|
|
if fd.get("granularity") == "all":
|
|
raise Exception("Pick a time granularity for your time series")
|
|
|
|
df = df.pivot_table(
|
|
index=DTTM_ALIAS,
|
|
columns=fd.get('groupby'),
|
|
values=fd.get('metrics'))
|
|
|
|
fm = fd.get("resample_fillmethod")
|
|
if not fm:
|
|
fm = None
|
|
how = fd.get("resample_how")
|
|
rule = fd.get("resample_rule")
|
|
if how and rule:
|
|
df = df.resample(rule, how=how, fill_method=fm)
|
|
if not fm:
|
|
df = df.fillna(0)
|
|
|
|
if self.sort_series:
|
|
dfs = df.sum()
|
|
dfs.sort_values(ascending=False, inplace=True)
|
|
df = df[dfs.index]
|
|
|
|
if fd.get("contribution"):
|
|
dft = df.T
|
|
df = (dft / dft.sum()).T
|
|
|
|
rolling_periods = fd.get("rolling_periods")
|
|
rolling_type = fd.get("rolling_type")
|
|
|
|
if rolling_type in ('mean', 'std', 'sum') and rolling_periods:
|
|
if rolling_type == 'mean':
|
|
df = pd.rolling_mean(df, int(rolling_periods), min_periods=0)
|
|
elif rolling_type == 'std':
|
|
df = pd.rolling_std(df, int(rolling_periods), min_periods=0)
|
|
elif rolling_type == 'sum':
|
|
df = pd.rolling_sum(df, int(rolling_periods), min_periods=0)
|
|
elif rolling_type == 'cumsum':
|
|
df = df.cumsum()
|
|
|
|
num_period_compare = fd.get("num_period_compare")
|
|
if num_period_compare:
|
|
num_period_compare = int(num_period_compare)
|
|
prt = fd.get('period_ratio_type')
|
|
if prt and prt == 'growth':
|
|
df = (df / df.shift(num_period_compare)) - 1
|
|
elif prt and prt == 'value':
|
|
df = df - df.shift(num_period_compare)
|
|
else:
|
|
df = df / df.shift(num_period_compare)
|
|
|
|
df = df[num_period_compare:]
|
|
|
|
chart_data = self.to_series(df)
|
|
|
|
time_compare = fd.get('time_compare')
|
|
if time_compare:
|
|
query_object = self.query_obj()
|
|
delta = utils.parse_human_timedelta(time_compare)
|
|
query_object['inner_from_dttm'] = query_object['from_dttm']
|
|
query_object['inner_to_dttm'] = query_object['to_dttm']
|
|
query_object['from_dttm'] -= delta
|
|
query_object['to_dttm'] -= delta
|
|
|
|
df2 = self.get_df(query_object)
|
|
df2[DTTM_ALIAS] += delta
|
|
df2 = df2.pivot_table(
|
|
index=DTTM_ALIAS,
|
|
columns=fd.get('groupby'),
|
|
values=fd.get('metrics'))
|
|
chart_data += self.to_series(
|
|
df2, classed='superset', title_suffix="---")
|
|
chart_data = sorted(chart_data, key=lambda x: x['key'])
|
|
return chart_data
|
|
|
|
|
|
class NVD3DualLineViz(NVD3Viz):
|
|
|
|
"""A rich line chart with dual axis"""
|
|
|
|
viz_type = "dual_line"
|
|
verbose_name = _("Time Series - Dual Axis Line Chart")
|
|
sort_series = False
|
|
is_timeseries = True
|
|
|
|
def query_obj(self):
|
|
d = super(NVD3DualLineViz, self).query_obj()
|
|
m1 = self.form_data.get('metric')
|
|
m2 = self.form_data.get('metric_2')
|
|
d['metrics'] = [m1, m2]
|
|
if not m1:
|
|
raise Exception("Pick a metric for left axis!")
|
|
if not m2:
|
|
raise Exception("Pick a metric for right axis!")
|
|
if m1 == m2:
|
|
raise Exception("Please choose different metrics"
|
|
" on left and right axis")
|
|
return d
|
|
|
|
def to_series(self, df, classed=''):
|
|
cols = []
|
|
for col in df.columns:
|
|
if col == '':
|
|
cols.append('N/A')
|
|
elif col is None:
|
|
cols.append('NULL')
|
|
else:
|
|
cols.append(col)
|
|
df.columns = cols
|
|
series = df.to_dict('series')
|
|
chart_data = []
|
|
metrics = [
|
|
self.form_data.get('metric'),
|
|
self.form_data.get('metric_2')
|
|
]
|
|
for i, m in enumerate(metrics):
|
|
ys = series[m]
|
|
if df[m].dtype.kind not in "biufc":
|
|
continue
|
|
series_title = m
|
|
d = {
|
|
"key": series_title,
|
|
"classed": classed,
|
|
"values": [
|
|
{'x': ds, 'y': ys[ds] if ds in ys else None}
|
|
for ds in df.index
|
|
],
|
|
"yAxis": i+1,
|
|
"type": "line"
|
|
}
|
|
chart_data.append(d)
|
|
return chart_data
|
|
|
|
def get_data(self, df):
|
|
fd = self.form_data
|
|
df = df.fillna(0)
|
|
|
|
if self.form_data.get("granularity") == "all":
|
|
raise Exception("Pick a time granularity for your time series")
|
|
|
|
metric = fd.get('metric')
|
|
metric_2 = fd.get('metric_2')
|
|
df = df.pivot_table(
|
|
index=DTTM_ALIAS,
|
|
values=[metric, metric_2])
|
|
|
|
chart_data = self.to_series(df)
|
|
return chart_data
|
|
|
|
|
|
class NVD3TimeSeriesBarViz(NVD3TimeSeriesViz):
|
|
|
|
"""A bar chart where the x axis is time"""
|
|
|
|
viz_type = "bar"
|
|
sort_series = True
|
|
verbose_name = _("Time Series - Bar Chart")
|
|
|
|
|
|
class NVD3CompareTimeSeriesViz(NVD3TimeSeriesViz):
|
|
|
|
"""A line chart component where you can compare the % change over time"""
|
|
|
|
viz_type = 'compare'
|
|
verbose_name = _("Time Series - Percent Change")
|
|
|
|
|
|
class NVD3TimeSeriesStackedViz(NVD3TimeSeriesViz):
|
|
|
|
"""A rich stack area chart"""
|
|
|
|
viz_type = "area"
|
|
verbose_name = _("Time Series - Stacked")
|
|
sort_series = True
|
|
|
|
|
|
class DistributionPieViz(NVD3Viz):
|
|
|
|
"""Annoy visualization snobs with this controversial pie chart"""
|
|
|
|
viz_type = "pie"
|
|
verbose_name = _("Distribution - NVD3 - Pie Chart")
|
|
is_timeseries = False
|
|
|
|
def get_data(self, df):
|
|
df = df.pivot_table(
|
|
index=self.groupby,
|
|
values=[self.metrics[0]])
|
|
df.sort_values(by=self.metrics[0], ascending=False, inplace=True)
|
|
df = df.reset_index()
|
|
df.columns = ['x', 'y']
|
|
return df.to_dict(orient="records")
|
|
|
|
|
|
class HistogramViz(BaseViz):
|
|
|
|
"""Histogram"""
|
|
|
|
viz_type = "histogram"
|
|
verbose_name = _("Histogram")
|
|
is_timeseries = False
|
|
|
|
def query_obj(self):
|
|
"""Returns the query object for this visualization"""
|
|
d = super(HistogramViz, self).query_obj()
|
|
d['row_limit'] = self.form_data.get(
|
|
'row_limit', int(config.get('VIZ_ROW_LIMIT')))
|
|
numeric_column = self.form_data.get('all_columns_x')
|
|
if numeric_column is None:
|
|
raise Exception("Must have one numeric column specified")
|
|
d['columns'] = [numeric_column]
|
|
return d
|
|
|
|
def get_data(self, df):
|
|
"""Returns the chart data"""
|
|
chart_data = df[df.columns[0]].values.tolist()
|
|
return chart_data
|
|
|
|
|
|
class DistributionBarViz(DistributionPieViz):
|
|
|
|
"""A good old bar chart"""
|
|
|
|
viz_type = "dist_bar"
|
|
verbose_name = _("Distribution - Bar Chart")
|
|
is_timeseries = False
|
|
|
|
def query_obj(self):
|
|
d = super(DistributionBarViz, self).query_obj() # noqa
|
|
fd = self.form_data
|
|
gb = fd.get('groupby') or []
|
|
cols = fd.get('columns') or []
|
|
d['groupby'] = set(gb + cols)
|
|
if len(d['groupby']) < len(gb) + len(cols):
|
|
raise Exception("Can't have overlap between Series and Breakdowns")
|
|
if not self.metrics:
|
|
raise Exception("Pick at least one metric")
|
|
if not self.groupby:
|
|
raise Exception("Pick at least one field for [Series]")
|
|
return d
|
|
|
|
def get_data(self, df):
|
|
fd = self.form_data
|
|
|
|
row = df.groupby(self.groupby).sum()[self.metrics[0]].copy()
|
|
row.sort_values(ascending=False, inplace=True)
|
|
columns = fd.get('columns') or []
|
|
pt = df.pivot_table(
|
|
index=self.groupby,
|
|
columns=columns,
|
|
values=self.metrics)
|
|
if fd.get("contribution"):
|
|
pt = pt.fillna(0)
|
|
pt = pt.T
|
|
pt = (pt / pt.sum()).T
|
|
pt = pt.reindex(row.index)
|
|
chart_data = []
|
|
for name, ys in pt.iteritems():
|
|
if pt[name].dtype.kind not in "biufc" or name in self.groupby:
|
|
continue
|
|
if isinstance(name, string_types):
|
|
series_title = name
|
|
elif len(self.metrics) > 1:
|
|
series_title = ", ".join(name)
|
|
else:
|
|
l = [str(s) for s in name[1:]]
|
|
series_title = ", ".join(l)
|
|
values = []
|
|
for i, v in ys.iteritems():
|
|
x = i
|
|
if isinstance(x, (tuple, list)):
|
|
x = ', '.join([str(s) for s in x])
|
|
else:
|
|
x = str(x)
|
|
values.append({
|
|
'x': x,
|
|
'y': v,
|
|
})
|
|
d = {
|
|
"key": series_title,
|
|
"values": values,
|
|
}
|
|
chart_data.append(d)
|
|
return chart_data
|
|
|
|
|
|
class SunburstViz(BaseViz):
|
|
|
|
"""A multi level sunburst chart"""
|
|
|
|
viz_type = "sunburst"
|
|
verbose_name = _("Sunburst")
|
|
is_timeseries = False
|
|
credits = (
|
|
'Kerry Rodden '
|
|
'@<a href="https://bl.ocks.org/kerryrodden/7090426">bl.ocks.org</a>')
|
|
|
|
def get_data(self, df):
|
|
|
|
# if m1 == m2 duplicate the metric column
|
|
cols = self.form_data.get('groupby')
|
|
metric = self.form_data.get('metric')
|
|
secondary_metric = self.form_data.get('secondary_metric')
|
|
if metric == secondary_metric:
|
|
ndf = df
|
|
ndf.columns = [cols + ['m1', 'm2']]
|
|
else:
|
|
cols += [
|
|
self.form_data['metric'], self.form_data['secondary_metric']]
|
|
ndf = df[cols]
|
|
return json.loads(ndf.to_json(orient="values")) # TODO fix this nonsense
|
|
|
|
def query_obj(self):
|
|
qry = super(SunburstViz, self).query_obj()
|
|
qry['metrics'] = [
|
|
self.form_data['metric'], self.form_data['secondary_metric']]
|
|
return qry
|
|
|
|
|
|
class SankeyViz(BaseViz):
|
|
|
|
"""A Sankey diagram that requires a parent-child dataset"""
|
|
|
|
viz_type = "sankey"
|
|
verbose_name = _("Sankey")
|
|
is_timeseries = False
|
|
credits = '<a href="https://www.npmjs.com/package/d3-sankey">d3-sankey on npm</a>'
|
|
|
|
def query_obj(self):
|
|
qry = super(SankeyViz, self).query_obj()
|
|
if len(qry['groupby']) != 2:
|
|
raise Exception("Pick exactly 2 columns as [Source / Target]")
|
|
qry['metrics'] = [
|
|
self.form_data['metric']]
|
|
return qry
|
|
|
|
def get_data(self, df):
|
|
df.columns = ['source', 'target', 'value']
|
|
recs = df.to_dict(orient='records')
|
|
|
|
hierarchy = defaultdict(set)
|
|
for row in recs:
|
|
hierarchy[row['source']].add(row['target'])
|
|
|
|
def find_cycle(g):
|
|
"""Whether there's a cycle in a directed graph"""
|
|
path = set()
|
|
|
|
def visit(vertex):
|
|
path.add(vertex)
|
|
for neighbour in g.get(vertex, ()):
|
|
if neighbour in path or visit(neighbour):
|
|
return (vertex, neighbour)
|
|
path.remove(vertex)
|
|
|
|
for v in g:
|
|
cycle = visit(v)
|
|
if cycle:
|
|
return cycle
|
|
|
|
cycle = find_cycle(hierarchy)
|
|
if cycle:
|
|
raise Exception(
|
|
"There's a loop in your Sankey, please provide a tree. "
|
|
"Here's a faulty link: {}".format(cycle))
|
|
return recs
|
|
|
|
|
|
class DirectedForceViz(BaseViz):
|
|
|
|
"""An animated directed force layout graph visualization"""
|
|
|
|
viz_type = "directed_force"
|
|
verbose_name = _("Directed Force Layout")
|
|
credits = 'd3noob @<a href="http://bl.ocks.org/d3noob/5141278">bl.ocks.org</a>'
|
|
is_timeseries = False
|
|
|
|
def query_obj(self):
|
|
qry = super(DirectedForceViz, self).query_obj()
|
|
if len(self.form_data['groupby']) != 2:
|
|
raise Exception("Pick exactly 2 columns to 'Group By'")
|
|
qry['metrics'] = [self.form_data['metric']]
|
|
return qry
|
|
|
|
def get_data(self, df):
|
|
df.columns = ['source', 'target', 'value']
|
|
return df.to_dict(orient='records')
|
|
|
|
|
|
class CountryMapViz(BaseViz):
|
|
|
|
"""A country centric"""
|
|
|
|
viz_type = "country_map"
|
|
verbose_name = _("Country Map")
|
|
is_timeseries = False
|
|
credits = 'From bl.ocks.org By john-guerra'
|
|
|
|
def query_obj(self):
|
|
qry = super(CountryMapViz, self).query_obj()
|
|
qry['metrics'] = [
|
|
self.form_data['metric']]
|
|
qry['groupby'] = [self.form_data['entity']]
|
|
return qry
|
|
|
|
def get_data(self, df):
|
|
from superset.data import countries
|
|
fd = self.form_data
|
|
cols = [fd.get('entity')]
|
|
metric = fd.get('metric')
|
|
cols += [metric]
|
|
ndf = df[cols]
|
|
df = ndf
|
|
df.columns = ['country_id', 'metric']
|
|
d = df.to_dict(orient='records')
|
|
return d
|
|
|
|
|
|
class WorldMapViz(BaseViz):
|
|
|
|
"""A country centric world map"""
|
|
|
|
viz_type = "world_map"
|
|
verbose_name = _("World Map")
|
|
is_timeseries = False
|
|
credits = 'datamaps on <a href="https://www.npmjs.com/package/datamaps">npm</a>'
|
|
|
|
def query_obj(self):
|
|
qry = super(WorldMapViz, self).query_obj()
|
|
qry['metrics'] = [
|
|
self.form_data['metric'], self.form_data['secondary_metric']]
|
|
qry['groupby'] = [self.form_data['entity']]
|
|
return qry
|
|
|
|
def get_data(self, df):
|
|
from superset.data import countries
|
|
fd = self.form_data
|
|
cols = [fd.get('entity')]
|
|
metric = fd.get('metric')
|
|
secondary_metric = fd.get('secondary_metric')
|
|
if metric == secondary_metric:
|
|
ndf = df[cols]
|
|
# df[metric] will be a DataFrame
|
|
# because there are duplicate column names
|
|
ndf['m1'] = df[metric].iloc[:, 0]
|
|
ndf['m2'] = ndf['m1']
|
|
else:
|
|
cols += [metric, secondary_metric]
|
|
ndf = df[cols]
|
|
df = ndf
|
|
df.columns = ['country', 'm1', 'm2']
|
|
d = df.to_dict(orient='records')
|
|
for row in d:
|
|
country = None
|
|
if isinstance(row['country'], string_types):
|
|
country = countries.get(
|
|
fd.get('country_fieldtype'), row['country'])
|
|
|
|
if country:
|
|
row['country'] = country['cca3']
|
|
row['latitude'] = country['lat']
|
|
row['longitude'] = country['lng']
|
|
row['name'] = country['name']
|
|
else:
|
|
row['country'] = "XXX"
|
|
return d
|
|
|
|
|
|
class FilterBoxViz(BaseViz):
|
|
|
|
"""A multi filter, multi-choice filter box to make dashboards interactive"""
|
|
|
|
viz_type = "filter_box"
|
|
verbose_name = _("Filters")
|
|
is_timeseries = False
|
|
credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original'
|
|
|
|
def query_obj(self):
|
|
qry = super(FilterBoxViz, self).query_obj()
|
|
groupby = self.form_data.get('groupby')
|
|
if len(groupby) < 1 and not self.form_data.get('date_filter'):
|
|
raise Exception("Pick at least one filter field")
|
|
qry['metrics'] = [
|
|
self.form_data['metric']]
|
|
return qry
|
|
|
|
def get_data(self, df):
|
|
qry = self.query_obj()
|
|
filters = [g for g in self.form_data['groupby']]
|
|
d = {}
|
|
for flt in filters:
|
|
qry['groupby'] = [flt]
|
|
df = super(FilterBoxViz, self).get_df(qry)
|
|
d[flt] = [{
|
|
'id': row[0],
|
|
'text': row[0],
|
|
'filter': flt,
|
|
'metric': row[1]}
|
|
for row in df.itertuples(index=False)
|
|
]
|
|
return d
|
|
|
|
|
|
class IFrameViz(BaseViz):
|
|
|
|
"""You can squeeze just about anything in this iFrame component"""
|
|
|
|
viz_type = "iframe"
|
|
verbose_name = _("iFrame")
|
|
credits = 'a <a href="https://github.com/airbnb/superset">Superset</a> original'
|
|
is_timeseries = False
|
|
|
|
def get_df(self):
|
|
return None
|
|
|
|
|
|
class ParallelCoordinatesViz(BaseViz):
|
|
|
|
"""Interactive parallel coordinate implementation
|
|
|
|
Uses this amazing javascript library
|
|
https://github.com/syntagmatic/parallel-coordinates
|
|
"""
|
|
|
|
viz_type = "para"
|
|
verbose_name = _("Parallel Coordinates")
|
|
credits = (
|
|
'<a href="https://syntagmatic.github.io/parallel-coordinates/">'
|
|
'Syntagmatic\'s library</a>')
|
|
is_timeseries = False
|
|
|
|
def query_obj(self):
|
|
d = super(ParallelCoordinatesViz, self).query_obj()
|
|
fd = self.form_data
|
|
d['metrics'] = copy.copy(fd.get('metrics'))
|
|
second = fd.get('secondary_metric')
|
|
if second not in d['metrics']:
|
|
d['metrics'] += [second]
|
|
d['groupby'] = [fd.get('series')]
|
|
return d
|
|
|
|
def get_data(self, df):
|
|
return df.to_dict(orient="records")
|
|
|
|
|
|
class HeatmapViz(BaseViz):
|
|
|
|
"""A nice heatmap visualization that support high density through canvas"""
|
|
|
|
viz_type = "heatmap"
|
|
verbose_name = _("Heatmap")
|
|
is_timeseries = False
|
|
credits = (
|
|
'inspired from mbostock @<a href="http://bl.ocks.org/mbostock/3074470">'
|
|
'bl.ocks.org</a>')
|
|
|
|
def query_obj(self):
|
|
d = super(HeatmapViz, self).query_obj()
|
|
fd = self.form_data
|
|
d['metrics'] = [fd.get('metric')]
|
|
d['groupby'] = [fd.get('all_columns_x'), fd.get('all_columns_y')]
|
|
return d
|
|
|
|
def get_data(self, df):
|
|
fd = self.form_data
|
|
x = fd.get('all_columns_x')
|
|
y = fd.get('all_columns_y')
|
|
v = fd.get('metric')
|
|
if x == y:
|
|
df.columns = ['x', 'y', 'v']
|
|
else:
|
|
df = df[[x, y, v]]
|
|
df.columns = ['x', 'y', 'v']
|
|
norm = fd.get('normalize_across')
|
|
overall = False
|
|
if norm == 'heatmap':
|
|
overall = True
|
|
else:
|
|
gb = df.groupby(norm, group_keys=False)
|
|
if len(gb) <= 1:
|
|
overall = True
|
|
else:
|
|
df['perc'] = (
|
|
gb.apply(
|
|
lambda x: (x.v - x.v.min()) / (x.v.max() - x.v.min()))
|
|
)
|
|
if overall:
|
|
v = df.v
|
|
min_ = v.min()
|
|
df['perc'] = (v - min_) / (v.max() - min_)
|
|
return df.to_dict(orient="records")
|
|
|
|
|
|
class HorizonViz(NVD3TimeSeriesViz):
|
|
|
|
"""Horizon chart
|
|
|
|
https://www.npmjs.com/package/d3-horizon-chart
|
|
"""
|
|
|
|
viz_type = "horizon"
|
|
verbose_name = _("Horizon Charts")
|
|
credits = (
|
|
'<a href="https://www.npmjs.com/package/d3-horizon-chart">'
|
|
'd3-horizon-chart</a>')
|
|
|
|
|
|
class MapboxViz(BaseViz):
|
|
|
|
"""Rich maps made with Mapbox"""
|
|
|
|
viz_type = "mapbox"
|
|
verbose_name = _("Mapbox")
|
|
is_timeseries = False
|
|
credits = (
|
|
'<a href=https://www.mapbox.com/mapbox-gl-js/api/>Mapbox GL JS</a>')
|
|
|
|
def query_obj(self):
|
|
d = super(MapboxViz, self).query_obj()
|
|
fd = self.form_data
|
|
label_col = fd.get('mapbox_label')
|
|
|
|
if not fd.get('groupby'):
|
|
d['columns'] = [fd.get('all_columns_x'), fd.get('all_columns_y')]
|
|
|
|
if label_col and len(label_col) >= 1:
|
|
if label_col[0] == "count":
|
|
raise Exception(
|
|
"Must have a [Group By] column to have 'count' as the [Label]")
|
|
d['columns'].append(label_col[0])
|
|
|
|
if fd.get('point_radius') != 'Auto':
|
|
d['columns'].append(fd.get('point_radius'))
|
|
|
|
d['columns'] = list(set(d['columns']))
|
|
else:
|
|
# Ensuring columns chosen are all in group by
|
|
if (label_col and len(label_col) >= 1 and
|
|
label_col[0] != "count" and
|
|
label_col[0] not in fd.get('groupby')):
|
|
raise Exception(
|
|
"Choice of [Label] must be present in [Group By]")
|
|
|
|
if (fd.get("point_radius") != "Auto" and
|
|
fd.get("point_radius") not in fd.get('groupby')):
|
|
raise Exception(
|
|
"Choice of [Point Radius] must be present in [Group By]")
|
|
|
|
if (fd.get('all_columns_x') not in fd.get('groupby') or
|
|
fd.get('all_columns_y') not in fd.get('groupby')):
|
|
raise Exception(
|
|
"[Longitude] and [Latitude] columns must be present in [Group By]")
|
|
return d
|
|
|
|
def get_data(self, df):
|
|
fd = self.form_data
|
|
label_col = fd.get('mapbox_label')
|
|
custom_metric = label_col and len(label_col) >= 1
|
|
metric_col = [None] * len(df.index)
|
|
if custom_metric:
|
|
if label_col[0] == fd.get('all_columns_x'):
|
|
metric_col = df[fd.get('all_columns_x')]
|
|
elif label_col[0] == fd.get('all_columns_y'):
|
|
metric_col = df[fd.get('all_columns_y')]
|
|
else:
|
|
metric_col = df[label_col[0]]
|
|
point_radius_col = (
|
|
[None] * len(df.index)
|
|
if fd.get("point_radius") == "Auto"
|
|
else df[fd.get("point_radius")])
|
|
|
|
# using geoJSON formatting
|
|
geo_json = {
|
|
"type": "FeatureCollection",
|
|
"features": [
|
|
{
|
|
"type": "Feature",
|
|
"properties": {
|
|
"metric": metric,
|
|
"radius": point_radius,
|
|
},
|
|
"geometry": {
|
|
"type": "Point",
|
|
"coordinates": [lon, lat],
|
|
}
|
|
}
|
|
for lon, lat, metric, point_radius
|
|
in zip(
|
|
df[fd.get('all_columns_x')],
|
|
df[fd.get('all_columns_y')],
|
|
metric_col, point_radius_col)
|
|
]
|
|
}
|
|
|
|
return {
|
|
"geoJSON": geo_json,
|
|
"customMetric": custom_metric,
|
|
"mapboxApiKey": config.get('MAPBOX_API_KEY'),
|
|
"mapStyle": fd.get("mapbox_style"),
|
|
"aggregatorName": fd.get("pandas_aggfunc"),
|
|
"clusteringRadius": fd.get("clustering_radius"),
|
|
"pointRadiusUnit": fd.get("point_radius_unit"),
|
|
"globalOpacity": fd.get("global_opacity"),
|
|
"viewportLongitude": fd.get("viewport_longitude"),
|
|
"viewportLatitude": fd.get("viewport_latitude"),
|
|
"viewportZoom": fd.get("viewport_zoom"),
|
|
"renderWhileDragging": fd.get("render_while_dragging"),
|
|
"tooltip": fd.get("rich_tooltip"),
|
|
"color": fd.get("mapbox_color"),
|
|
}
|
|
|
|
|
|
viz_types_list = [
|
|
TableViz,
|
|
PivotTableViz,
|
|
NVD3TimeSeriesViz,
|
|
NVD3DualLineViz,
|
|
NVD3CompareTimeSeriesViz,
|
|
NVD3TimeSeriesStackedViz,
|
|
NVD3TimeSeriesBarViz,
|
|
DistributionBarViz,
|
|
DistributionPieViz,
|
|
BubbleViz,
|
|
BulletViz,
|
|
MarkupViz,
|
|
WordCloudViz,
|
|
BigNumberViz,
|
|
BigNumberTotalViz,
|
|
SunburstViz,
|
|
DirectedForceViz,
|
|
SankeyViz,
|
|
CountryMapViz,
|
|
WorldMapViz,
|
|
FilterBoxViz,
|
|
IFrameViz,
|
|
ParallelCoordinatesViz,
|
|
HeatmapViz,
|
|
BoxPlotViz,
|
|
TreemapViz,
|
|
CalHeatmapViz,
|
|
HorizonViz,
|
|
MapboxViz,
|
|
HistogramViz,
|
|
SeparatorViz,
|
|
]
|
|
|
|
viz_types = OrderedDict([(v.viz_type, v) for v in viz_types_list
|
|
if v.viz_type not in config.get('VIZ_TYPE_BLACKLIST')])
|