[KYUUBI #3828] [PySpark] Support Python style check with spotless in CI style workflow and reformat tool
### _Why are the changes needed?_ to close #3828. Python code style checking support. 1. reuse Spotless maven plugin for Python style check 2. add style check to CI style workflow 3. add python linting support to `dev/reformat`. checks whether `black` installed in PATH. ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests) locally before make a pull request Closes #3823 from bowenliang123/spotless-python. Closes #3828 4a4de885 [liangbowen] simplify empty tags 0bb9ec7c [liangbowen] simplify empty tag in pom 9dd39531 [liangbowen] lint python code with black via spotless f85020fa [liangbowen] typo 4c93bce0 [liangbowen] install python 3.9 first 23fc4b96 [liangbowen] ci install black version from added `spotless.python.black.version` property 73f746b0 [Bowen Liang] Update dev/reformat 46667a00 [liangbowen] update style.yml 9c20b434 [liangbowen] update style.yml 21017e5e [liangbowen] update style.yml 8272c0bc [liangbowen] add python style to style checking for CI e102726c [liangbowen] add profile spotless in dev/reformat if black found in path 062e9bf2 [liangbowen] add python scan for spotless. add new profile `spotless-python` for python file path. Lead-authored-by: liangbowen <liangbowen@gf.com.cn> Co-authored-by: Bowen Liang <bowen.liang.123@gmail.com> Signed-off-by: Cheng Pan <chengpan@apache.org>
This commit is contained in:
parent
d0f3029908
commit
00f52a2cb2
12
.github/workflows/style.yml
vendored
12
.github/workflows/style.yml
vendored
@ -45,6 +45,11 @@ jobs:
|
||||
java-version: 8
|
||||
cache: 'maven'
|
||||
check-latest: false
|
||||
- name: Setup Python 3
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: '3.9'
|
||||
cache: 'pip'
|
||||
- name: Check kyuubi modules avaliable
|
||||
id: modules-check
|
||||
run: build/mvn dependency:resolve -DincludeGroupIds="org.apache.kyuubi" -DincludeScope="compile" -DexcludeTransitive=true ${{ matrix.profiles }}
|
||||
@ -72,8 +77,11 @@ jobs:
|
||||
cat $log;
|
||||
fi
|
||||
done
|
||||
- name: JavaStyle with maven
|
||||
run: build/mvn spotless:check ${{ matrix.profiles }}
|
||||
- name: Spotless style check
|
||||
run: |
|
||||
SPOTLESS_BLACK_VERSION=$(build/mvn help:evaluate -Dexpression=spotless.python.black.version -q -DforceStdout)
|
||||
pip install black==$SPOTLESS_BLACK_VERSION
|
||||
build/mvn spotless:check ${{ matrix.profiles }} -Pspotless-python
|
||||
- name: setup npm
|
||||
uses: actions/setup-node@v3
|
||||
with:
|
||||
|
||||
@ -22,4 +22,12 @@ KYUUBI_HOME="$(cd "`dirname "$0"`/.."; pwd)"
|
||||
|
||||
PROFILES="-Pflink-provided,hive-provided,spark-provided,spark-block-cleaner,spark-3.3,spark-3.2,spark-3.1,tpcds"
|
||||
|
||||
# python style checks rely on `black` in path
|
||||
if ! command -v black &> /dev/null
|
||||
then
|
||||
echo "Skip Python lint since 'black' is not available."
|
||||
else
|
||||
PROFILES="${PROFILES},spotless-python"
|
||||
fi
|
||||
|
||||
${KYUUBI_HOME}/build/mvn spotless:apply $PROFILES
|
||||
|
||||
@ -26,7 +26,7 @@ import traceback
|
||||
from glob import glob
|
||||
|
||||
if sys.version_info[0] < 3:
|
||||
sys.exit('Python < 3 is unsupported.')
|
||||
sys.exit("Python < 3 is unsupported.")
|
||||
|
||||
spark_home = os.environ.get("SPARK_HOME", "")
|
||||
os.environ["PYSPARK_PYTHON"] = os.environ.get("PYSPARK_PYTHON", sys.executable)
|
||||
@ -68,7 +68,7 @@ global_dict = {}
|
||||
|
||||
class NormalNode(object):
|
||||
def __init__(self, code):
|
||||
self.code = compile(code, '<stdin>', 'exec', ast.PyCF_ONLY_AST, 1)
|
||||
self.code = compile(code, "<stdin>", "exec", ast.PyCF_ONLY_AST, 1)
|
||||
|
||||
def execute(self):
|
||||
to_run_exec, to_run_single = self.code.body[:-1], self.code.body[-1:]
|
||||
@ -76,12 +76,12 @@ class NormalNode(object):
|
||||
try:
|
||||
for node in to_run_exec:
|
||||
mod = Module([node], [])
|
||||
code = compile(mod, '<stdin>', 'exec')
|
||||
code = compile(mod, "<stdin>", "exec")
|
||||
exec(code, global_dict)
|
||||
|
||||
for node in to_run_single:
|
||||
mod = ast.Interactive([node])
|
||||
code = compile(mod, '<stdin>', 'single')
|
||||
code = compile(mod, "<stdin>", "single")
|
||||
exec(code, global_dict)
|
||||
except Exception:
|
||||
# We don't need to log the exception because we're just executing user
|
||||
@ -115,10 +115,10 @@ def parse_code_into_nodes(code):
|
||||
except SyntaxError:
|
||||
normal = []
|
||||
chunks = []
|
||||
for i, line in enumerate(code.rstrip().split('\n')):
|
||||
if line.startswith('%'):
|
||||
for i, line in enumerate(code.rstrip().split("\n")):
|
||||
if line.startswith("%"):
|
||||
if normal:
|
||||
chunks.append('\n'.join(normal))
|
||||
chunks.append("\n".join(normal))
|
||||
normal = []
|
||||
|
||||
chunks.append(line)
|
||||
@ -126,7 +126,7 @@ def parse_code_into_nodes(code):
|
||||
normal.append(line)
|
||||
|
||||
if normal:
|
||||
chunks.append('\n'.join(normal))
|
||||
chunks.append("\n".join(normal))
|
||||
|
||||
# Convert the chunks into AST nodes. Let exceptions propagate.
|
||||
for chunk in chunks:
|
||||
@ -141,46 +141,55 @@ def parse_code_into_nodes(code):
|
||||
|
||||
def execute_reply(status, content):
|
||||
msg = {
|
||||
'msg_type': 'execute_reply',
|
||||
'content': dict(
|
||||
"msg_type": "execute_reply",
|
||||
"content": dict(
|
||||
content,
|
||||
status=status,
|
||||
)
|
||||
),
|
||||
}
|
||||
return json.dumps(msg)
|
||||
|
||||
|
||||
def execute_reply_ok(data):
|
||||
return execute_reply("ok", {
|
||||
"data": data,
|
||||
})
|
||||
return execute_reply(
|
||||
"ok",
|
||||
{
|
||||
"data": data,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def execute_reply_error(exc_type, exc_value, tb):
|
||||
formatted_tb = traceback.format_exception(exc_type, exc_value, tb, chain=False)
|
||||
for i in range(len(formatted_tb)):
|
||||
if TOP_FRAME_REGEX.match(formatted_tb[i]):
|
||||
formatted_tb = formatted_tb[:1] + formatted_tb[i + 1:]
|
||||
formatted_tb = formatted_tb[:1] + formatted_tb[i + 1 :]
|
||||
break
|
||||
|
||||
return execute_reply('error', {
|
||||
'ename': str(exc_type.__name__),
|
||||
'evalue': str(exc_value),
|
||||
'traceback': formatted_tb,
|
||||
})
|
||||
return execute_reply(
|
||||
"error",
|
||||
{
|
||||
"ename": str(exc_type.__name__),
|
||||
"evalue": str(exc_value),
|
||||
"traceback": formatted_tb,
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def execute_reply_internal_error(message, exc_info=None):
|
||||
return execute_reply('error', {
|
||||
'ename': 'InternalError',
|
||||
'evalue': message,
|
||||
'traceback': [],
|
||||
})
|
||||
return execute_reply(
|
||||
"error",
|
||||
{
|
||||
"ename": "InternalError",
|
||||
"evalue": message,
|
||||
"traceback": [],
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
def execute_request(content):
|
||||
try:
|
||||
code = content['code']
|
||||
code = content["code"]
|
||||
except KeyError:
|
||||
return execute_reply_internal_error(
|
||||
'Malformed message: content object missing "code"', sys.exc_info()
|
||||
@ -208,7 +217,7 @@ def execute_request(content):
|
||||
|
||||
clearOutputs()
|
||||
|
||||
output = result.pop('text/plain', '')
|
||||
output = result.pop("text/plain", "")
|
||||
|
||||
if stdout:
|
||||
output += stdout
|
||||
@ -220,14 +229,14 @@ def execute_request(content):
|
||||
|
||||
# Only add the output if it exists, or if there are no other mimetypes in the result.
|
||||
if output or not result:
|
||||
result['text/plain'] = output.rstrip()
|
||||
result["text/plain"] = output.rstrip()
|
||||
|
||||
return execute_reply_ok(result)
|
||||
|
||||
|
||||
# get or create spark session
|
||||
spark_session = kyuubi_util.get_spark_session()
|
||||
global_dict['spark'] = spark_session
|
||||
global_dict["spark"] = spark_session
|
||||
|
||||
|
||||
def main():
|
||||
@ -248,9 +257,9 @@ def main():
|
||||
while True:
|
||||
line = sys_stdin.readline()
|
||||
|
||||
if line == '':
|
||||
if line == "":
|
||||
break
|
||||
elif line == '\n':
|
||||
elif line == "\n":
|
||||
continue
|
||||
|
||||
try:
|
||||
@ -258,7 +267,7 @@ def main():
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if content['cmd'] == 'exit_worker':
|
||||
if content["cmd"] == "exit_worker":
|
||||
break
|
||||
|
||||
result = execute_request(content)
|
||||
@ -272,5 +281,5 @@ def main():
|
||||
sys.stderr = sys_stderr
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if __name__ == "__main__":
|
||||
sys.exit(main())
|
||||
|
||||
@ -34,18 +34,16 @@ def connect_to_exist_gateway() -> "JavaGateway":
|
||||
if os.environ.get("PYSPARK_PIN_THREAD", "true").lower() == "true":
|
||||
gateway = ClientServer(
|
||||
java_parameters=JavaParameters(
|
||||
port=gateway_port,
|
||||
auth_token=gateway_secret,
|
||||
auto_convert=True),
|
||||
python_parameters=PythonParameters(
|
||||
port=0,
|
||||
eager_load=False))
|
||||
port=gateway_port, auth_token=gateway_secret, auto_convert=True
|
||||
),
|
||||
python_parameters=PythonParameters(port=0, eager_load=False),
|
||||
)
|
||||
else:
|
||||
gateway = JavaGateway(
|
||||
gateway_parameters=GatewayParameters(
|
||||
port=gateway_port,
|
||||
auth_token=gateway_secret,
|
||||
auto_convert=True))
|
||||
port=gateway_port, auth_token=gateway_secret, auto_convert=True
|
||||
)
|
||||
)
|
||||
# gateway.proc = proc
|
||||
|
||||
# Import the classes used by PySpark
|
||||
@ -67,12 +65,14 @@ def _get_exist_spark_context(self, jconf):
|
||||
"""
|
||||
Initialize SparkContext in function to allow subclass specific initialization
|
||||
"""
|
||||
return self._jvm.JavaSparkContext(self._jvm.org.apache.spark.SparkContext.getOrCreate(jconf))
|
||||
return self._jvm.JavaSparkContext(
|
||||
self._jvm.org.apache.spark.SparkContext.getOrCreate(jconf)
|
||||
)
|
||||
|
||||
|
||||
def get_spark_session() -> "SparkSession":
|
||||
SparkContext._initialize_context = _get_exist_spark_context
|
||||
gateway = connect_to_exist_gateway()
|
||||
SparkContext._ensure_initialized(gateway=gateway)
|
||||
spark = SparkSession.builder.master('local').appName('test').getOrCreate()
|
||||
spark = SparkSession.builder.master("local").appName("test").getOrCreate()
|
||||
return spark
|
||||
|
||||
19
pom.xml
19
pom.xml
@ -223,6 +223,10 @@
|
||||
<!-- Package to use when relocating shaded classes. -->
|
||||
<kyuubi.shade.packageName>org.apache.kyuubi.shade</kyuubi.shade.packageName>
|
||||
|
||||
<!-- Needed for Spotless style check-->
|
||||
<spotless.python.includes/>
|
||||
<spotless.python.black.version>22.3.0</spotless.python.black.version>
|
||||
|
||||
<distMgmtReleaseId>apache.releases.https</distMgmtReleaseId>
|
||||
<distMgmtReleaseName>Apache Release Distribution Repository</distMgmtReleaseName>
|
||||
<distMgmtReleaseUrl>https://repository.apache.org/service/local/staging/deploy/maven2</distMgmtReleaseUrl>
|
||||
@ -2035,6 +2039,14 @@
|
||||
<file>${maven.multiModuleProjectDirectory}/.scalafmt.conf</file>
|
||||
</scalafmt>
|
||||
</scala>
|
||||
<python>
|
||||
<includes>
|
||||
<include>${spotless.python.includes}</include>
|
||||
</includes>
|
||||
<black>
|
||||
<version>${spotless.python.black.version}</version>
|
||||
</black>
|
||||
</python>
|
||||
</configuration>
|
||||
<executions>
|
||||
<execution>
|
||||
@ -2300,6 +2312,13 @@
|
||||
</modules>
|
||||
</profile>
|
||||
|
||||
<profile>
|
||||
<id>spotless-python</id>
|
||||
<properties>
|
||||
<spotless.python.includes>src/**/*.py</spotless.python.includes>
|
||||
</properties>
|
||||
</profile>
|
||||
|
||||
<profile>
|
||||
<id>apache-release</id>
|
||||
<build>
|
||||
|
||||
Loading…
Reference in New Issue
Block a user