[druid] optimize Druid queries where possible (#1517)

* [druid] optimize Druid queries where possible Trying to use timeseries, topn where possible, falling back on 2-phases groupby only where needed * Fixing py3 bug
2026-04-20 16:44:46 +00:00 · 2016-11-02 11:25:33 -07:00
parent cdf4dd0302
commit 1b124bfb87
3 changed files with 110 additions and 65 deletions
--- a/tests/druid_tests.py
+++ b/tests/druid_tests.py
@@ -113,10 +113,11 @@ class DruidTests(CaravelTestCase):
        instance.query_dict = {}
        instance.query_builder.last_query.query_dict = {}

-        resp = self.client.get('/caravel/explore/druid/{}/'.format(
+        resp = self.get_resp('/caravel/explore/druid/{}/'.format(
            datasource_id))
-        assert "[test_cluster].[test_datasource]" in resp.data.decode('utf-8')
+        assert "[test_cluster].[test_datasource]" in resp

+        # One groupby
        url = (
            '/caravel/explore_json/druid/{}/?viz_type=table&granularity=one+day&'
            'druid_time_origin=&since=7+days+ago&until=now&row_limit=5000&'
@@ -125,8 +126,35 @@ class DruidTests(CaravelTestCase):
            'action=&datasource_name=test_datasource&datasource_id={}&'
            'datasource_type=druid&previous_viz_type=table&'
            'force=true'.format(datasource_id, datasource_id))
-        resp = self.get_resp(url)
-        assert "Canada" in resp
+        resp = self.get_json_resp(url)
+        self.assertEqual("Canada", resp['data']['records'][0]['name'])
+
+        # two groupby
+        url = (
+            '/caravel/explore_json/druid/{}/?viz_type=table&granularity=one+day&'
+            'druid_time_origin=&since=7+days+ago&until=now&row_limit=5000&'
+            'include_search=false&metrics=count&groupby=name&'
+            'flt_col_0=dim1&groupby=second&'
+            'flt_op_0=in&flt_eq_0=&slice_id=&slice_name=&collapsed_fieldsets=&'
+            'action=&datasource_name=test_datasource&datasource_id={}&'
+            'datasource_type=druid&previous_viz_type=table&'
+            'force=true'.format(datasource_id, datasource_id))
+        resp = self.get_json_resp(url)
+        self.assertEqual("Canada", resp['data']['records'][0]['name'])
+
+        # no groupby
+        url = (
+            '/caravel/explore_json/druid/{}/?viz_type=table&granularity=one+day&'
+            'druid_time_origin=&since=7+days+ago&until=now&row_limit=5000&'
+            'include_search=false&metrics=count&'
+            'flt_col_0=dim1&'
+            'flt_op_0=in&flt_eq_0=&slice_id=&slice_name=&collapsed_fieldsets=&'
+            'action=&datasource_name=test_datasource&datasource_id={}&'
+            'datasource_type=druid&previous_viz_type=table&'
+            'force=true'.format(datasource_id, datasource_id))
+        resp = self.get_json_resp(url)
+        self.assertEqual(
+            "2012-01-01T00:00:00", resp['data']['records'][0]['timestamp'])

    def test_druid_sync_from_config(self):
        CLUSTER_NAME = 'new_druid'