From 00f52a2cb29ddf7af8436cd84c02432efd3f0c85 Mon Sep 17 00:00:00 2001 From: liangbowen Date: Tue, 22 Nov 2022 19:44:02 +0800 Subject: [PATCH] [KYUUBI #3828] [PySpark] Support Python style check with spotless in CI style workflow and reformat tool ### _Why are the changes needed?_ to close #3828. Python code style checking support. 1. reuse Spotless maven plugin for Python style check 2. add style check to CI style workflow 3. add python linting support to `dev/reformat`. checks whether `black` installed in PATH. ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests) locally before make a pull request Closes #3823 from bowenliang123/spotless-python. Closes #3828 4a4de885 [liangbowen] simplify empty tags 0bb9ec7c [liangbowen] simplify empty tag in pom 9dd39531 [liangbowen] lint python code with black via spotless f85020fa [liangbowen] typo 4c93bce0 [liangbowen] install python 3.9 first 23fc4b96 [liangbowen] ci install black version from added `spotless.python.black.version` property 73f746b0 [Bowen Liang] Update dev/reformat 46667a00 [liangbowen] update style.yml 9c20b434 [liangbowen] update style.yml 21017e5e [liangbowen] update style.yml 8272c0bc [liangbowen] add python style to style checking for CI e102726c [liangbowen] add profile spotless in dev/reformat if black found in path 062e9bf2 [liangbowen] add python scan for spotless. add new profile `spotless-python` for python file path. Lead-authored-by: liangbowen Co-authored-by: Bowen Liang Signed-off-by: Cheng Pan --- .github/workflows/style.yml | 12 ++- dev/reformat | 8 ++ .../main/resources/python/execute_python.py | 75 +++++++++++-------- .../src/main/resources/python/kyuubi_util.py | 22 +++--- pom.xml | 19 +++++ 5 files changed, 90 insertions(+), 46 deletions(-) diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml index 46151a31c..e70cb1bdc 100644 --- a/.github/workflows/style.yml +++ b/.github/workflows/style.yml @@ -45,6 +45,11 @@ jobs: java-version: 8 cache: 'maven' check-latest: false + - name: Setup Python 3 + uses: actions/setup-python@v4 + with: + python-version: '3.9' + cache: 'pip' - name: Check kyuubi modules avaliable id: modules-check run: build/mvn dependency:resolve -DincludeGroupIds="org.apache.kyuubi" -DincludeScope="compile" -DexcludeTransitive=true ${{ matrix.profiles }} @@ -72,8 +77,11 @@ jobs: cat $log; fi done - - name: JavaStyle with maven - run: build/mvn spotless:check ${{ matrix.profiles }} + - name: Spotless style check + run: | + SPOTLESS_BLACK_VERSION=$(build/mvn help:evaluate -Dexpression=spotless.python.black.version -q -DforceStdout) + pip install black==$SPOTLESS_BLACK_VERSION + build/mvn spotless:check ${{ matrix.profiles }} -Pspotless-python - name: setup npm uses: actions/setup-node@v3 with: diff --git a/dev/reformat b/dev/reformat index 88a8c238a..7c6ef7124 100755 --- a/dev/reformat +++ b/dev/reformat @@ -22,4 +22,12 @@ KYUUBI_HOME="$(cd "`dirname "$0"`/.."; pwd)" PROFILES="-Pflink-provided,hive-provided,spark-provided,spark-block-cleaner,spark-3.3,spark-3.2,spark-3.1,tpcds" +# python style checks rely on `black` in path +if ! command -v black &> /dev/null +then + echo "Skip Python lint since 'black' is not available." +else + PROFILES="${PROFILES},spotless-python" +fi + ${KYUUBI_HOME}/build/mvn spotless:apply $PROFILES diff --git a/externals/kyuubi-spark-sql-engine/src/main/resources/python/execute_python.py b/externals/kyuubi-spark-sql-engine/src/main/resources/python/execute_python.py index 67539b3b9..251be544c 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/resources/python/execute_python.py +++ b/externals/kyuubi-spark-sql-engine/src/main/resources/python/execute_python.py @@ -26,7 +26,7 @@ import traceback from glob import glob if sys.version_info[0] < 3: - sys.exit('Python < 3 is unsupported.') + sys.exit("Python < 3 is unsupported.") spark_home = os.environ.get("SPARK_HOME", "") os.environ["PYSPARK_PYTHON"] = os.environ.get("PYSPARK_PYTHON", sys.executable) @@ -68,7 +68,7 @@ global_dict = {} class NormalNode(object): def __init__(self, code): - self.code = compile(code, '', 'exec', ast.PyCF_ONLY_AST, 1) + self.code = compile(code, "", "exec", ast.PyCF_ONLY_AST, 1) def execute(self): to_run_exec, to_run_single = self.code.body[:-1], self.code.body[-1:] @@ -76,12 +76,12 @@ class NormalNode(object): try: for node in to_run_exec: mod = Module([node], []) - code = compile(mod, '', 'exec') + code = compile(mod, "", "exec") exec(code, global_dict) for node in to_run_single: mod = ast.Interactive([node]) - code = compile(mod, '', 'single') + code = compile(mod, "", "single") exec(code, global_dict) except Exception: # We don't need to log the exception because we're just executing user @@ -115,10 +115,10 @@ def parse_code_into_nodes(code): except SyntaxError: normal = [] chunks = [] - for i, line in enumerate(code.rstrip().split('\n')): - if line.startswith('%'): + for i, line in enumerate(code.rstrip().split("\n")): + if line.startswith("%"): if normal: - chunks.append('\n'.join(normal)) + chunks.append("\n".join(normal)) normal = [] chunks.append(line) @@ -126,7 +126,7 @@ def parse_code_into_nodes(code): normal.append(line) if normal: - chunks.append('\n'.join(normal)) + chunks.append("\n".join(normal)) # Convert the chunks into AST nodes. Let exceptions propagate. for chunk in chunks: @@ -141,46 +141,55 @@ def parse_code_into_nodes(code): def execute_reply(status, content): msg = { - 'msg_type': 'execute_reply', - 'content': dict( + "msg_type": "execute_reply", + "content": dict( content, status=status, - ) + ), } return json.dumps(msg) def execute_reply_ok(data): - return execute_reply("ok", { - "data": data, - }) + return execute_reply( + "ok", + { + "data": data, + }, + ) def execute_reply_error(exc_type, exc_value, tb): formatted_tb = traceback.format_exception(exc_type, exc_value, tb, chain=False) for i in range(len(formatted_tb)): if TOP_FRAME_REGEX.match(formatted_tb[i]): - formatted_tb = formatted_tb[:1] + formatted_tb[i + 1:] + formatted_tb = formatted_tb[:1] + formatted_tb[i + 1 :] break - return execute_reply('error', { - 'ename': str(exc_type.__name__), - 'evalue': str(exc_value), - 'traceback': formatted_tb, - }) + return execute_reply( + "error", + { + "ename": str(exc_type.__name__), + "evalue": str(exc_value), + "traceback": formatted_tb, + }, + ) def execute_reply_internal_error(message, exc_info=None): - return execute_reply('error', { - 'ename': 'InternalError', - 'evalue': message, - 'traceback': [], - }) + return execute_reply( + "error", + { + "ename": "InternalError", + "evalue": message, + "traceback": [], + }, + ) def execute_request(content): try: - code = content['code'] + code = content["code"] except KeyError: return execute_reply_internal_error( 'Malformed message: content object missing "code"', sys.exc_info() @@ -208,7 +217,7 @@ def execute_request(content): clearOutputs() - output = result.pop('text/plain', '') + output = result.pop("text/plain", "") if stdout: output += stdout @@ -220,14 +229,14 @@ def execute_request(content): # Only add the output if it exists, or if there are no other mimetypes in the result. if output or not result: - result['text/plain'] = output.rstrip() + result["text/plain"] = output.rstrip() return execute_reply_ok(result) # get or create spark session spark_session = kyuubi_util.get_spark_session() -global_dict['spark'] = spark_session +global_dict["spark"] = spark_session def main(): @@ -248,9 +257,9 @@ def main(): while True: line = sys_stdin.readline() - if line == '': + if line == "": break - elif line == '\n': + elif line == "\n": continue try: @@ -258,7 +267,7 @@ def main(): except ValueError: continue - if content['cmd'] == 'exit_worker': + if content["cmd"] == "exit_worker": break result = execute_request(content) @@ -272,5 +281,5 @@ def main(): sys.stderr = sys_stderr -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(main()) diff --git a/externals/kyuubi-spark-sql-engine/src/main/resources/python/kyuubi_util.py b/externals/kyuubi-spark-sql-engine/src/main/resources/python/kyuubi_util.py index 8bbe6eb7c..a76b3b726 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/resources/python/kyuubi_util.py +++ b/externals/kyuubi-spark-sql-engine/src/main/resources/python/kyuubi_util.py @@ -34,18 +34,16 @@ def connect_to_exist_gateway() -> "JavaGateway": if os.environ.get("PYSPARK_PIN_THREAD", "true").lower() == "true": gateway = ClientServer( java_parameters=JavaParameters( - port=gateway_port, - auth_token=gateway_secret, - auto_convert=True), - python_parameters=PythonParameters( - port=0, - eager_load=False)) + port=gateway_port, auth_token=gateway_secret, auto_convert=True + ), + python_parameters=PythonParameters(port=0, eager_load=False), + ) else: gateway = JavaGateway( gateway_parameters=GatewayParameters( - port=gateway_port, - auth_token=gateway_secret, - auto_convert=True)) + port=gateway_port, auth_token=gateway_secret, auto_convert=True + ) + ) # gateway.proc = proc # Import the classes used by PySpark @@ -67,12 +65,14 @@ def _get_exist_spark_context(self, jconf): """ Initialize SparkContext in function to allow subclass specific initialization """ - return self._jvm.JavaSparkContext(self._jvm.org.apache.spark.SparkContext.getOrCreate(jconf)) + return self._jvm.JavaSparkContext( + self._jvm.org.apache.spark.SparkContext.getOrCreate(jconf) + ) def get_spark_session() -> "SparkSession": SparkContext._initialize_context = _get_exist_spark_context gateway = connect_to_exist_gateway() SparkContext._ensure_initialized(gateway=gateway) - spark = SparkSession.builder.master('local').appName('test').getOrCreate() + spark = SparkSession.builder.master("local").appName("test").getOrCreate() return spark diff --git a/pom.xml b/pom.xml index c6afd1adf..d7e416dc2 100644 --- a/pom.xml +++ b/pom.xml @@ -223,6 +223,10 @@ org.apache.kyuubi.shade + + + 22.3.0 + apache.releases.https Apache Release Distribution Repository https://repository.apache.org/service/local/staging/deploy/maven2 @@ -2035,6 +2039,14 @@ ${maven.multiModuleProjectDirectory}/.scalafmt.conf + + + ${spotless.python.includes} + + + ${spotless.python.black.version} + + @@ -2300,6 +2312,13 @@ + + spotless-python + + src/**/*.py + + + apache-release