From 2ae6acda65d4c5217eb59f9c3a2edcce30ad668d Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 8 Oct 2021 16:01:08 +0800 Subject: [PATCH] [KYUUBI #1039] Kill yarn job when engine initialize timeout and yarnApplicationState is ACCEPTED kill yarn job when engine initialize timeout and yarnApplicationState is ACCEPTED #1039 ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests) locally before make a pull request Closes #1119 from simon824/killyarnjob. Closes #1039 7face4db [simon] get KYUUBI_HOME da8c3ec4 [Simon] Merge branch 'apache:master' into killyarnjob 95ae8f59 [simon] add appId 8a6ddcd1 [simon] kill application by script e0cd2af8 [simon] kill application by script d15d38c5 [simon] kill application by script 1fd1373b [simon] mv sparklauncher to sparkProcessBuilder 5e806426 [simon] fix codestyle a9cc4505 [simon] fix option No value Exception 9c7ca2f8 [simon] fix option No value Exception bbfe8e25 [simon] add set sparkHome 04f23c8a [simon] fix codestyle 75599233 [simon] add spark-launcher dep ef4b2706 [simon] implement by sparkLauncher 59c25b7c [simon] kill yarn application by restful api 49921a48 [simon] fix ut a31d8f6a [simon] fix ut 1abc6665 [simon] rename killApplication 43a95c1d [simon] Merge branch 'master' into killyarnjob 3d9c12d3 [simon] Merge remote-tracking branch 'upstream/master' 9eaeb16d [simon] fix ut 64ee1b11 [simon] code style 5b905dbd [simon] Merge remote-tracking branch 'upstream/master' 139f3b79 [Simon] Merge branch 'apache:master' into killyarnjob 1a52401d [simon] #1039 Lead-authored-by: simon Co-authored-by: Simon <3656562@qq.com> Signed-off-by: ulysses-you --- bin/stop-application.sh | 29 +++++++++++++++++++ .../org/apache/kyuubi/engine/EngineRef.scala | 3 +- .../apache/kyuubi/engine/ProcBuilder.scala | 24 +++++++++++++++ 3 files changed, 55 insertions(+), 1 deletion(-) create mode 100755 bin/stop-application.sh diff --git a/bin/stop-application.sh b/bin/stop-application.sh new file mode 100755 index 000000000..3fadde416 --- /dev/null +++ b/bin/stop-application.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +if [[ $# < 1 ]] ; then + echo "USAGE: $0 " + exit 1 +fi + +if [[ -z ${HADOOP_HOME} ]]; then + echo "Error: HADOOP_HOME IS NOT SET! CANNOT PROCEED." + exit 1 +fi + +$HADOOP_HOME/bin/yarn application -kill $1 diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala index 20bb6dbf2..8abcdaf71 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/EngineRef.scala @@ -182,10 +182,11 @@ private[kyuubi] class EngineRef( } } if (started + timeout <= System.currentTimeMillis()) { + val killMessage = builder.killApplication() process.destroyForcibly() MetricsSystem.tracing(_.incCount(MetricRegistry.name(ENGINE_TIMEOUT, appUser))) throw KyuubiSQLException( - s"Timeout($timeout ms) to launched Spark with $builder", + s"Timeout($timeout ms) to launched Spark with $builder. $killMessage", builder.getError) } engineRef = getEngineByRefId(zkClient, engineSpace, engineRefId) diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala index 6e48170e7..62e325581 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ProcBuilder.scala @@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets import java.nio.file.{Files, Path} import scala.collection.JavaConverters._ +import scala.util.matching.Regex import org.apache.commons.lang3.StringUtils.containsIgnoreCase @@ -148,6 +149,29 @@ trait ProcBuilder { proc } + val YARN_APP_NAME_REGEX: Regex = "application_\\d+_\\d+".r + + def killApplication(line: String = lastRowOfLog): String = + YARN_APP_NAME_REGEX.findFirstIn(line) match { + case Some(appId) => + env.get(KyuubiConf.KYUUBI_HOME) match { + case Some(kyuubiHome) => + val pb = new ProcessBuilder("/bin/sh", s"$kyuubiHome/bin/stop-application.sh", appId) + pb.environment() + .putAll(env.asJava) + pb.redirectError(engineLog) + pb.redirectOutput(engineLog) + val process = pb.start() + process.waitFor() match { + case id if id != 0 => s"Failed to kill Application $appId, please kill it manually. " + case _ => s"Killed Application $appId successfully. " + } + case None => + s"KYUUBI_HOME is not set! Failed to kill Application $appId, please kill it manually." + } + case None => "" + } + def close(): Unit = { if (logCaptureThread != null) { logCaptureThread.interrupt()