[explore] proper filtering of NULLs and '' (#4651)

* [WiP] [explore] proper filtering of NULLs and ''

TODO: handling of Druid equivalents

* Unit tests

* Some refactoring

* [druid] fix 'Unorderable types' when col has nuls

Error "unorderable types: str() < int()" occurs when grouping by a
numerical Druid colummn that contains null values.

* druid/pydruid returns strings in the datafram with NAs for nulls
* Superset has custom logic around get_fillna_for_col that fills in the
NULLs based on declared column type (FLOAT here), so now we have a mixed
bag of type in the series
* pandas chokes on pivot_table or groupby operations as it cannot sorts
mixed types

The approach here is to stringify and fillna('<NULL>') to get a
consistent series.

* typo

* Fix druid_func tests

* Addressing more comments

* last touches
This commit is contained in:
Maxime Beauchemin
2018-04-17 22:26:21 -07:00
committed by GitHub
parent 44c2d5bdab
commit eac97ce9f2
15 changed files with 153 additions and 57 deletions

View File

@@ -34,7 +34,7 @@ class DruidFuncTestCase(unittest.TestCase):
def test_get_filters_ignores_invalid_filter_objects(self):
filtr = {'col': 'col1', 'op': '=='}
filters = [filtr]
self.assertEqual(None, DruidDatasource.get_filters(filters, []))
self.assertIsNone(DruidDatasource.get_filters(filters, []))
def test_get_filters_constructs_filter_in(self):
filtr = {'col': 'A', 'op': 'in', 'val': ['a', 'b', 'c']}
@@ -108,7 +108,7 @@ class DruidFuncTestCase(unittest.TestCase):
filtr1 = {'col': 'A', 'op': 'in', 'val': []}
filtr2 = {'col': 'A', 'op': 'not in', 'val': []}
res = DruidDatasource.get_filters([filtr1, filtr2], [])
self.assertEqual(None, res)
self.assertIsNone(res)
def test_get_filters_constructs_equals_for_in_not_in_single_value(self):
filtr = {'col': 'A', 'op': 'in', 'val': ['a']}
@@ -119,14 +119,15 @@ class DruidFuncTestCase(unittest.TestCase):
filtr = {'col': 'A', 'op': '==', 'val': ['a', 'b']}
res = DruidDatasource.get_filters([filtr], [])
self.assertEqual('a', res.filter['filter']['value'])
filtr = {'col': 'A', 'op': '==', 'val': []}
res = DruidDatasource.get_filters([filtr], [])
self.assertEqual('', res.filter['filter']['value'])
self.assertIsNone(res.filter['filter']['value'])
def test_get_filters_handles_none_for_string_types(self):
filtr = {'col': 'A', 'op': '==', 'val': None}
res = DruidDatasource.get_filters([filtr], [])
self.assertEqual('', res.filter['filter']['value'])
self.assertIsNone(res)
def test_get_filters_extracts_values_in_quotes(self):
filtr = {'col': 'A', 'op': 'in', 'val': [' "a" ']}