datetime format and database expression on column level (#652)

* time format minor features added

* add description for datetime format input

* db version bug walkaround

* removed unecessary comments and fixed minor bug

* fixed code style

* minor fix

* fixed missing time format column in DruidDatasource

* Update models.py

Minor style fix

* Revert "Update models.py"

This reverts commit 6897c388e0.

* removed timestamp_format from druid and removed try catch in migration

* Using spaces, not tabs

* get the most updated migration and add the migration on the head of it

* remove vscode setting file

* use colunm based dttm_format

* modify dttm_converter

* modify datetime viz

* added comments and documents

* fixed some description and removed unnecessary import

* fix migration head

* minor style

* minor style

* deleted empty lines

* delete print statement

* add epoch converter

* error fixed

* fixed epoch parsing issue

* delete unnecessary lines

* fixed typo

* fix minor error

* fix styling issues

* fix styling error

* fixed typo

* support epoch_ms and did some refactoring

* fixed styling error

* fixed styling error

* add one more dataset to test dttm_format and db_expr

* add more slices

* styling

* specified String() lenght
This commit is contained in:
yxjames
2016-06-27 21:33:44 -07:00
committed by Maxime Beauchemin
parent 3e742c74bb
commit 7a7f61a296
7 changed files with 203 additions and 28 deletions

View File

@@ -12,7 +12,7 @@ import datetime
import random
import pandas as pd
from sqlalchemy import String, DateTime, Date, Float
from sqlalchemy import String, DateTime, Date, Float, BigInteger
from caravel import app, db, models, utils
@@ -1020,3 +1020,84 @@ def load_long_lat_data():
params=get_slice_json(slice_data),
)
merge_slice(slc)
def load_multiformat_time_series_data():
"""Loading time series data from a zip file in the repo"""
with gzip.open(os.path.join(DATA_FOLDER, 'multiformat_time_series.json.gz')) as f:
pdf = pd.read_json(f)
pdf.ds = pd.to_datetime(pdf.ds, unit='s')
pdf.ds2 = pd.to_datetime(pdf.ds2, unit='s')
pdf.to_sql(
'multiformat_time_series',
db.engine,
if_exists='replace',
chunksize=500,
dtype={
"ds": Date,
'ds2': DateTime,
"epoch_s": BigInteger,
"epoch_ms": BigInteger,
"string0": String(100),
"string1": String(100),
"string2": String(100),
"string3": String(100),
},
index=False)
print("Done loading table!")
print("-" * 80)
print("Creating table [multiformat_time_series] reference")
obj = db.session.query(TBL).filter_by(table_name='multiformat_time_series').first()
if not obj:
obj = TBL(table_name='multiformat_time_series')
obj.main_dttm_col = 'ds'
obj.database = get_or_create_db(db.session)
obj.is_featured = False
dttm_and_expr_dict = {
'ds': [None, None],
'ds2': [None, None],
'epoch_s': ['epoch_s', None],
'epoch_ms': ['epoch_ms', None],
'string2': ['%Y%m%d-%H%M%S', None],
'string1': ['%Y-%m-%d^%H:%M:%S', None],
'string0': ['%Y-%m-%d %H:%M:%S.%f', None],
'string3': ['%Y/%m/%d%H:%M:%S.%f', None],
}
for col in obj.table_columns:
print(col.column_name)
dttm_and_expr = dttm_and_expr_dict[col.column_name]
col.python_date_format = dttm_and_expr[0]
col.dbatabase_expr = dttm_and_expr[1]
db.session.merge(obj)
db.session.commit()
obj.fetch_metadata()
tbl = obj
print("Creating some slices")
i = 0
for col in tbl.table_columns:
slice_data = {
"granularity_sqla": col.column_name,
"datasource_id": "8",
"datasource_name": "multiformat_time_series",
"datasource_type": "table",
"granularity": "day",
"row_limit": config.get("ROW_LIMIT"),
"since": "1 year ago",
"until": "now",
"where": "",
"viz_type": "cal_heatmap",
"domain_granularity": "month",
"subdomain_granularity": "day",
}
slc = Slice(
slice_name="Calendar Heatmap multiformat" + str(i),
viz_type='cal_heatmap',
datasource_type='table',
table=tbl,
params=get_slice_json(slice_data),
)
i += 1
merge_slice(slc)