mirror of
https://github.com/apache/superset.git
synced 2026-04-18 15:44:57 +00:00
Implement table name extraction. (#1598)
* Implement table name extraction tests. * Address comments. * Fix tests and reimplement the token processing. * Exclude aliases. * Clean up print statements and code. * Reverse select test. * Fix failing test. * Test JOINs * refactore as a class * Check for permissions in SQL Lab. * Implement permissions check for the datasources in sql_lab * Address comments.
This commit is contained in:
101
superset/sql_parse.py
Normal file
101
superset/sql_parse.py
Normal file
@@ -0,0 +1,101 @@
|
||||
import sqlparse
|
||||
from sqlparse.sql import IdentifierList, Identifier
|
||||
from sqlparse.tokens import Keyword, Name
|
||||
|
||||
RESULT_OPERATIONS = {'UNION', 'INTERSECT', 'EXCEPT'}
|
||||
PRECEDES_TABLE_NAME = {'FROM', 'JOIN', 'DESC', 'DESCRIBE', 'WITH'}
|
||||
|
||||
|
||||
# TODO: some sql_lab logic here.
|
||||
class SupersetQuery(object):
|
||||
def __init__(self, sql_statement):
|
||||
self._tokens = []
|
||||
self.sql = sql_statement
|
||||
self._table_names = set()
|
||||
self._alias_names = set()
|
||||
# TODO: multistatement support
|
||||
for statement in sqlparse.parse(self.sql):
|
||||
self.__extract_from_token(statement)
|
||||
self._table_names = self._table_names - self._alias_names
|
||||
|
||||
@property
|
||||
def tables(self):
|
||||
return self._table_names
|
||||
|
||||
# TODO: use sqlparse for this check.
|
||||
def is_select(self):
|
||||
return self.sql.upper().startswith('SELECT')
|
||||
|
||||
@staticmethod
|
||||
def __precedes_table_name(token_value):
|
||||
for keyword in PRECEDES_TABLE_NAME:
|
||||
if keyword in token_value:
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def __get_full_name(identifier):
|
||||
if len(identifier.tokens) > 1 and identifier.tokens[1].value == '.':
|
||||
return "{}.{}".format(identifier.tokens[0].value,
|
||||
identifier.tokens[2].value)
|
||||
return identifier.get_real_name()
|
||||
|
||||
@staticmethod
|
||||
def __is_result_operation(keyword):
|
||||
for operation in RESULT_OPERATIONS:
|
||||
if operation in keyword.upper():
|
||||
return True
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def __is_identifier(token):
|
||||
return (
|
||||
isinstance(token, IdentifierList) or isinstance(token, Identifier))
|
||||
|
||||
def __process_identifier(self, identifier):
|
||||
# exclude subselects
|
||||
if '(' not in '{}'.format(identifier):
|
||||
self._table_names.add(SupersetQuery.__get_full_name(identifier))
|
||||
return
|
||||
|
||||
# store aliases
|
||||
if hasattr(identifier, 'get_alias'):
|
||||
self._alias_names.add(identifier.get_alias())
|
||||
if hasattr(identifier, 'tokens'):
|
||||
# some aliases are not parsed properly
|
||||
if identifier.tokens[0].ttype == Name:
|
||||
self._alias_names.add(identifier.tokens[0].value)
|
||||
self.__extract_from_token(identifier)
|
||||
|
||||
def __extract_from_token(self, token):
|
||||
if not hasattr(token, 'tokens'):
|
||||
return
|
||||
|
||||
table_name_preceding_token = False
|
||||
|
||||
for item in token.tokens:
|
||||
if item.is_group and not self.__is_identifier(item):
|
||||
self.__extract_from_token(item)
|
||||
|
||||
if item.ttype in Keyword:
|
||||
if SupersetQuery.__precedes_table_name(item.value.upper()):
|
||||
table_name_preceding_token = True
|
||||
continue
|
||||
|
||||
if not table_name_preceding_token:
|
||||
continue
|
||||
|
||||
if item.ttype in Keyword:
|
||||
if SupersetQuery.__is_result_operation(item.value):
|
||||
table_name_preceding_token = False
|
||||
continue
|
||||
# FROM clause is over
|
||||
break
|
||||
|
||||
if isinstance(item, Identifier):
|
||||
self.__process_identifier(item)
|
||||
|
||||
if isinstance(item, IdentifierList):
|
||||
for token in item.tokens:
|
||||
if SupersetQuery.__is_identifier(token):
|
||||
self.__process_identifier(token)
|
||||
Reference in New Issue
Block a user