diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 228d305325b..fce578c1eb7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -100,3 +100,22 @@ repos: - id: ruff args: [--fix] - id: ruff-format + - repo: local + hooks: + - id: pylint + name: pylint with custom Superset plugins + entry: bash + language: system + types: [python] + exclude: ^(tests/|superset/migrations/|scripts/|RELEASING/|docker/) + args: + - -c + - | + TARGET_BRANCH=${GITHUB_BASE_REF:-master} + git fetch origin "$TARGET_BRANCH" + files=$(git diff --name-only --diff-filter=ACM origin/"$TARGET_BRANCH"..HEAD | grep '^superset/.*\.py$' || true) + if [ -n "$files" ]; then + pylint --rcfile=.pylintrc --load-plugins=superset.extensions.pylint $files + else + echo "No Python files to lint." + fi diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 00000000000..80b5ef5f668 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,355 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Add files or directories to the blacklist. They should be base names, not +# paths. +ignore=CVS,migrations + +# Add files or directories matching the regex patterns to the blacklist. The +# regex matches against base names, not paths. +ignore-patterns= + +# Pickle collected data for later comparisons. +persistent=yes + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins=superset.extensions.pylint + +# Use multiple processes to speed up Pylint. +jobs=2 + +# Allow loading of arbitrary C extensions. Extensions are imported into the +# active Python interpreter and may run arbitrary code. +unsafe-load-any-extension=no + +# A comma-separated list of package or module names from where C extensions may +# be loaded. Extensions are loading into the active Python interpreter and may +# run arbitrary code +extension-pkg-whitelist=pyarrow + + +[MESSAGES CONTROL] +disable=all +enable=disallowed-json-import,disallowed-sql-import,consider-using-transaction + + +[REPORTS] + +# Set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html. You can also give a reporter class, eg +# mypackage.mymodule.MyReporterClass. +output-format=text + +# Tells whether to display a full report or only the messages +reports=yes + +# Python expression which should return a note less than 10 (10 is the highest +# note). You have access to the variables errors warning, statement which +# respectively contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (RP0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Template used to display messages. This is a python new-style format string +# used to format the message information. See doc for all details +#msg-template= + + +[BASIC] + +# Good variable names which should always be accepted, separated by a comma +good-names=_,df,ex,f,i,id,j,k,l,o,pk,Run,ts,v,x,y + +# Bad variable names which should always be refused, separated by a comma +bad-names=bar,baz,db,fd,foo,sesh,session,tata,toto,tutu + +# Colon-delimited sets of names that determine each other's naming style when +# the name regexes allow several styles. +name-group= + +# Include a hint for the correct naming format with invalid-name +include-naming-hint=no + +# List of decorators that produce properties, such as abc.abstractproperty. Add +# to this list to register other decorators that produce valid properties. +property-classes= + abc.abstractproperty, + sqlalchemy.ext.hybrid.hybrid_property + +# Regular expression matching correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct method names +method-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct variable names +variable-rgx=[a-z_][a-z0-9_]{1,30}$ + +# Regular expression matching correct inline iteration names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Regular expression matching correct constant names +const-rgx=(([A-Za-z_][A-Za-z0-9_]*)|(__.*__))$ + +# Regular expression matching correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression matching correct class attribute names +class-attribute-rgx=([A-Za-z_][A-Za-z0-9_]{2,30}|(__.*__))$ + +# Regular expression matching correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression matching correct attribute names +attr-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression matching correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match function or class names that do +# not require a docstring. +no-docstring-rgx=^_ + +# Minimum line length for functions/classes that require docstrings, shorter +# ones are exempt. +docstring-min-length=10 + + +[ELIF] + +# Maximum number of nested blocks for function / method body +max-nested-blocks=5 + + +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=100 + +# Regexp for a line that is allowed to be longer than the limit. +ignore-long-lines=^\s*(# )??$ + +# Allow the body of an if to be on the same line as the test if there is no +# else. +single-line-if-stmt=no + +# Maximum number of lines in a module +max-module-lines=1000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' + +# Number of spaces of indent required inside a hanging or continued line. +indent-after-paren=4 + +# Expected format of line ending, e.g. empty (any line ending), LF or CRLF. +expected-line-ending-format= + + +[LOGGING] + +# Logging modules to check that the string format arguments are in logging +# function parameter format +logging-modules=logging + + +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX + + +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=5 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + +# Ignore imports when computing similarities. +ignore-imports=no + + +[SPELLING] + +# Spelling dictionary name. Available dictionaries: none. To make it working +# install python-enchant package. +spelling-dict= + +# List of comma separated words that should not be checked. +spelling-ignore-words= + +# A path to a file that contains private dictionary; one word per line. +spelling-private-dict-file= + +# Tells whether to store unknown words to indicated private dictionary in +# --spelling-private-dict-file option instead of raising a message. +spelling-store-unknown-words=no + + +[TYPECHECK] + +# Tells whether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of module names for which member attributes should not be checked +# (useful for modules/projects where namespaces are manipulated during runtime +# and thus existing member attributes cannot be deduced by static analysis. It +# supports qualified module names, as well as Unix pattern matching. +ignored-modules=numpy,pandas,alembic.op,sqlalchemy,alembic.context,flask_appbuilder.security.sqla.PermissionView.role,flask_appbuilder.Model.metadata,flask_appbuilder.Base.metadata + +# List of class names for which member attributes should not be checked (useful +# for classes with dynamically set attributes). This supports the use of +# qualified names. +ignored-classes=contextlib.closing,optparse.Values,thread._local,_thread._local + +# List of members which are set dynamically and missed by pylint inference +# system, and so shouldn't trigger E1101 when accessed. Python regular +# expressions are accepted. +generated-members= + +# List of decorators that produce context managers, such as +# contextlib.contextmanager. Add to this list to register other decorators that +# produce valid context managers. +contextmanager-decorators=contextlib.contextmanager + + +[VARIABLES] + +# Tells whether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching the name of dummy variables (i.e. expectedly +# not used). +dummy-variables-rgx=(_+[a-zA-Z0-9]*?$)|dummy + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + +# List of strings which can identify a callback function by name. A callback +# name must start or end with one of those strings. +callbacks=cb_,_cb + +# List of qualified module names which can have objects that can redefine +# builtins. +redefining-builtins-modules=six.moves,future.builtins + + +[CLASSES] + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + +# List of valid names for the first argument in a class method. +valid-classmethod-first-arg=cls + +# List of valid names for the first argument in a metaclass class method. +valid-metaclass-classmethod-first-arg=mcs + +# List of member names, which should be excluded from the protected access +# warning. +exclude-protected=_asdict,_fields,_replace,_source,_make + + +[DESIGN] + +# Maximum number of arguments for function / method +max-args=5 + +# Argument names that match this expression will be ignored. Default to name +# with leading underscore +ignored-argument-names=_.* + +# Maximum number of locals for function / method body +max-locals=15 + +# Maximum number of return / yield for function / method body +max-returns=10 + +# Maximum number of branch for function / method body +max-branches=15 + +# Maximum number of statements in function / method body +max-statements=50 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of attributes for a class (see R0902). +max-attributes=8 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + +# Maximum number of boolean expressions in a if statement +max-bool-expr=5 + + +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=optparse + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report RP0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report RP0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report RP0402 must +# not be disabled) +int-import-graph= + +# Force import order to recognize a module as part of the standard +# compatibility libraries. +known-standard-library= + +# Force import order to recognize a module as part of a third party library. +known-third-party=enchant + +# Analyse import fallback blocks. This can be used to support both Python 2 and +# 3 compatible code, which means that the block might have code that exists +# only in one or another interpreter, leading to false positives when analysed. +analyse-fallback-blocks=no + + +[EXCEPTIONS] + +# Exceptions that will emit a warning when being caught. Defaults to +# "Exception" +overgeneral-exceptions=builtins.Exception diff --git a/pyproject.toml b/pyproject.toml index 6596fb6c920..d0437ae18e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -199,6 +199,7 @@ development = [ "psutil", "pyfakefs", "pyinstrument>=4.0.2,<5", + "pylint", "pytest<8.0.0", # hairy issue with pytest >=8 where current_app proxies are not set in time "pytest-cov", "pytest-mock", diff --git a/requirements/development.txt b/requirements/development.txt index 4466fc684d9..02109fa517a 100644 --- a/requirements/development.txt +++ b/requirements/development.txt @@ -18,6 +18,8 @@ apsw==3.49.2.0 # via # -c requirements/base.txt # shillelagh +astroid==3.3.10 + # via pylint attrs==25.3.0 # via # -c requirements/base.txt @@ -156,6 +158,8 @@ deprecation==2.1.0 # via # -c requirements/base.txt # apache-superset +dill==0.4.0 + # via pylint distlib==0.3.8 # via virtualenv dnspython==2.7.0 @@ -356,6 +360,8 @@ isodate==0.7.2 # via # -c requirements/base.txt # apache-superset +isort==6.0.1 + # via pylint itsdangerous==2.2.0 # via # -c requirements/base.txt @@ -430,6 +436,8 @@ marshmallow-sqlalchemy==1.4.0 # flask-appbuilder matplotlib==3.9.0 # via prophet +mccabe==0.7.0 + # via pylint mdurl==0.1.2 # via # -c requirements/base.txt @@ -537,6 +545,7 @@ pillow==10.3.0 platformdirs==4.3.8 # via # -c requirements/base.txt + # pylint # requests-cache # virtualenv pluggy==1.5.0 @@ -619,6 +628,8 @@ pyjwt==2.10.1 # apache-superset # flask-appbuilder # flask-jwt-extended +pylint==3.3.7 + # via apache-superset pynacl==1.5.0 # via # -c requirements/base.txt @@ -808,6 +819,8 @@ tabulate==0.9.0 # via # -c requirements/base.txt # apache-superset +tomlkit==0.13.3 + # via pylint tqdm==4.67.1 # via # cmdstanpy diff --git a/superset/extensions/pylint.py b/superset/extensions/pylint.py index 5925f180b78..c328b1c28fb 100644 --- a/superset/extensions/pylint.py +++ b/superset/extensions/pylint.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. import os +from pathlib import Path from astroid import nodes from pylint.checkers import BaseChecker @@ -22,12 +23,12 @@ from pylint.lint import PyLinter class JSONLibraryImportChecker(BaseChecker): - name = "json-library-import-checker" + name = "disallowed-json-import" priority = -1 msgs = { - "C9999": ( + "C9001": ( "Disallowed json import used, use superset.utils.json instead", - "disallowed-import", + "disallowed-json-import", "Used when a disallowed import is used in a specific file.", ), } @@ -47,19 +48,19 @@ class JSONLibraryImportChecker(BaseChecker): if file not in self.exclude_files: for module_name, _ in node.names: if module_name in ["json", "simplejson"]: - self.add_message("disallowed-import", node=node) + self.add_message("disallowed-json-import", node=node) def visit_importfrom(self, node: nodes.ImportFrom) -> None: file = (node.root().file).replace(self.path_strip_prefix, "", 1) if file not in self.exclude_files: if node.modname in ["json", "simplejson"]: - self.add_message("disallowed-import", node=node) + self.add_message("disallowed-json-import", node=node) class TransactionChecker(BaseChecker): name = "consider-using-transaction" msgs = { - "W0001": ( + "W9001": ( 'Consider using the @transaction decorator when defining a "unit of work"', "consider-using-transaction", "Used when an explicit commit or rollback call is detected", @@ -72,6 +73,41 @@ class TransactionChecker(BaseChecker): self.add_message("consider-using-transaction", node=node) +class SQLParsingLibraryImportChecker(BaseChecker): + name = "disallowed-sql-import" + priority = 0 + msgs = { + "C9002": ( + "Disallowed SQL parsing import used", + "disallowed-sql-import", + "Used when a disallowed import is used in a specific file.", + ), + } + + def _is_disallowed(self, file_path: Path, root_mod: str) -> bool: + # True if sqlglot is imported outside superset/sql, + # or if any forbidden library is imported anywhere + in_superset_sql = file_path.match("**/superset/sql/**") + return (root_mod == "sqlglot" and not in_superset_sql) or root_mod in { + "sqlparse", + "sqloxide", + } + + def visit_import(self, node: nodes.Import) -> None: + root_file = Path(node.root().file or "") + for mod, _ in node.names: + root_mod = mod.split(".", 1)[0] + if self._is_disallowed(root_file, root_mod): + self.add_message("disallowed-sql-import", node=node) + + def visit_importfrom(self, node: nodes.ImportFrom) -> None: + root_file = Path(node.root().file or "") + root_mod = (node.modname or "").split(".", 1)[0] + if self._is_disallowed(root_file, root_mod): + self.add_message("disallowed-sql-import", node=node) + + def register(linter: PyLinter) -> None: linter.register_checker(JSONLibraryImportChecker(linter)) + linter.register_checker(SQLParsingLibraryImportChecker(linter)) linter.register_checker(TransactionChecker(linter))