datetime format and database expression on column level (#652)

* time format minor features added * add description for datetime format input * db version bug walkaround * removed unecessary comments and fixed minor bug * fixed code style * minor fix * fixed missing time format column in DruidDatasource * Update models.py Minor style fix * Revert "Update models.py" This reverts commit 6897c388e0. * removed timestamp_format from druid and removed try catch in migration * Using spaces, not tabs * get the most updated migration and add the migration on the head of it * remove vscode setting file * use colunm based dttm_format * modify dttm_converter * modify datetime viz * added comments and documents * fixed some description and removed unnecessary import * fix migration head * minor style * minor style * deleted empty lines * delete print statement * add epoch converter * error fixed * fixed epoch parsing issue * delete unnecessary lines * fixed typo * fix minor error * fix styling issues * fix styling error * fixed typo * support epoch_ms and did some refactoring * fixed styling error * fixed styling error * add one more dataset to test dttm_format and db_expr * add more slices * styling * specified String() lenght
2026-04-20 00:24:38 +00:00 · 2016-06-27 21:33:44 -07:00
parent 3e742c74bb
commit 7a7f61a296
7 changed files with 203 additions and 28 deletions
--- a/caravel/data/init.py
+++ b/caravel/data/init.py
@@ -12,7 +12,7 @@ import datetime
 import random

 import pandas as pd
-from sqlalchemy import String, DateTime, Date, Float
+from sqlalchemy import String, DateTime, Date, Float, BigInteger

 from caravel import app, db, models, utils

@@ -1020,3 +1020,84 @@ def load_long_lat_data():
        params=get_slice_json(slice_data),
    )
    merge_slice(slc)
+
+
+def load_multiformat_time_series_data():
+
+    """Loading time series data from a zip file in the repo"""
+    with gzip.open(os.path.join(DATA_FOLDER, 'multiformat_time_series.json.gz')) as f:
+        pdf = pd.read_json(f)
+    pdf.ds = pd.to_datetime(pdf.ds, unit='s')
+    pdf.ds2 = pd.to_datetime(pdf.ds2, unit='s')
+    pdf.to_sql(
+        'multiformat_time_series',
+        db.engine,
+        if_exists='replace',
+        chunksize=500,
+        dtype={
+            "ds": Date,
+            'ds2': DateTime,
+            "epoch_s": BigInteger,
+            "epoch_ms": BigInteger,
+            "string0": String(100),
+            "string1": String(100),
+            "string2": String(100),
+            "string3": String(100),
+        },
+        index=False)
+    print("Done loading table!")
+    print("-" * 80)
+    print("Creating table [multiformat_time_series] reference")
+    obj = db.session.query(TBL).filter_by(table_name='multiformat_time_series').first()
+    if not obj:
+        obj = TBL(table_name='multiformat_time_series')
+    obj.main_dttm_col = 'ds'
+    obj.database = get_or_create_db(db.session)
+    obj.is_featured = False
+    dttm_and_expr_dict = {
+        'ds': [None, None],
+        'ds2': [None, None],
+        'epoch_s': ['epoch_s', None],
+        'epoch_ms': ['epoch_ms', None],
+        'string2': ['%Y%m%d-%H%M%S', None],
+        'string1': ['%Y-%m-%d^%H:%M:%S', None],
+        'string0': ['%Y-%m-%d %H:%M:%S.%f', None],
+        'string3': ['%Y/%m/%d%H:%M:%S.%f', None],
+    }
+    for col in obj.table_columns:
+        print(col.column_name)
+        dttm_and_expr = dttm_and_expr_dict[col.column_name]
+        col.python_date_format = dttm_and_expr[0]
+        col.dbatabase_expr = dttm_and_expr[1]
+    db.session.merge(obj)
+    db.session.commit()
+    obj.fetch_metadata()
+    tbl = obj
+
+    print("Creating some slices")
+    i = 0
+    for col in tbl.table_columns:
+        slice_data = {
+            "granularity_sqla": col.column_name,
+            "datasource_id": "8",
+            "datasource_name": "multiformat_time_series",
+            "datasource_type": "table",
+            "granularity": "day",
+            "row_limit": config.get("ROW_LIMIT"),
+            "since": "1 year ago",
+            "until": "now",
+            "where": "",
+            "viz_type": "cal_heatmap",
+            "domain_granularity": "month",
+            "subdomain_granularity": "day",
+        }
+
+        slc = Slice(
+            slice_name="Calendar Heatmap multiformat" + str(i),
+            viz_type='cal_heatmap',
+            datasource_type='table',
+            table=tbl,
+            params=get_slice_json(slice_data),
+        )
+        i += 1
+        merge_slice(slc)