Add docstrings and typing to db_engine_specs and sql_parse (#8058)

* Add typing to db_engine_specs

* Add more type annotations and docstrings

* Add docstrings and typing to sql_parse and db_engine_specs

* Refine select_star

* Fix execute and add more docstrings

* Revert kwargs change from execute

* Remove redundant or

* Align view and table getter schema types

* Fix return type of latest_partition

* Remove some typing from presto

* Improve docstring for __extract_from_token
This commit is contained in:
Ville Brofeldt
2019-08-22 06:29:32 +03:00
committed by GitHub
parent 40776bd547
commit fb51632e18
21 changed files with 496 additions and 194 deletions

View File

@@ -14,8 +14,10 @@
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from datetime import datetime
import hashlib
import re
from typing import Any, Dict, List, Tuple
import pandas as pd
from sqlalchemy import literal_column
@@ -56,28 +58,29 @@ class BigQueryEngineSpec(BaseEngineSpec):
}
@classmethod
def convert_dttm(cls, target_type, dttm):
def convert_dttm(cls, target_type: str, dttm: datetime) -> str:
tt = target_type.upper()
if tt == "DATE":
return "'{}'".format(dttm.strftime("%Y-%m-%d"))
return "'{}'".format(dttm.strftime("%Y-%m-%d %H:%M:%S"))
@classmethod
def fetch_data(cls, cursor, limit):
def fetch_data(cls, cursor, limit: int) -> List[Tuple]:
data = super(BigQueryEngineSpec, cls).fetch_data(cursor, limit)
if data and type(data[0]).__name__ == "Row":
data = [r.values() for r in data]
return data
@staticmethod
def mutate_label(label):
def _mutate_label(label: str) -> str:
"""
BigQuery field_name should start with a letter or underscore and contain only
alphanumeric characters. Labels that start with a number are prefixed with an
underscore. Any unsupported characters are replaced with underscores and an
md5 hash is added to the end of the label to avoid possible collisions.
:param str label: the original label which might include unsupported characters
:return: String that is supported by the database
:param label: Expected expression label
:return: Conditionally mutated label
"""
label_hashed = "_" + hashlib.md5(label.encode("utf-8")).hexdigest()
@@ -93,15 +96,20 @@ class BigQueryEngineSpec(BaseEngineSpec):
return label_mutated
@classmethod
def truncate_label(cls, label):
def _truncate_label(cls, label: str) -> str:
"""BigQuery requires column names start with either a letter or
underscore. To make sure this is always the case, an underscore is prefixed
to the truncated label.
to the md5 hash of the original label.
:param label: expected expression label
:return: truncated label
"""
return "_" + hashlib.md5(label.encode("utf-8")).hexdigest()
@classmethod
def extra_table_metadata(cls, database, table_name, schema_name):
def extra_table_metadata(
cls, database, table_name: str, schema_name: str
) -> Dict[str, Any]:
indexes = database.get_indexes(table_name, schema_name)
if not indexes:
return {}
@@ -136,11 +144,11 @@ class BigQueryEngineSpec(BaseEngineSpec):
]
@classmethod
def epoch_to_dttm(cls):
def epoch_to_dttm(cls) -> str:
return "TIMESTAMP_SECONDS({col})"
@classmethod
def epoch_ms_to_dttm(cls):
def epoch_ms_to_dttm(cls) -> str:
return "TIMESTAMP_MILLIS({col})"
@classmethod