[KYUUBI #1039] Kill yarn job when engine initialize timeout and yarnApplicationState is ACCEPTED
kill yarn job when engine initialize timeout and yarnApplicationState is ACCEPTED #1039 ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests) locally before make a pull request Closes #1119 from simon824/killyarnjob. Closes #1039 7face4db [simon] get KYUUBI_HOME da8c3ec4 [Simon] Merge branch 'apache:master' into killyarnjob 95ae8f59 [simon] add appId 8a6ddcd1 [simon] kill application by script e0cd2af8 [simon] kill application by script d15d38c5 [simon] kill application by script 1fd1373b [simon] mv sparklauncher to sparkProcessBuilder 5e806426 [simon] fix codestyle a9cc4505 [simon] fix option No value Exception 9c7ca2f8 [simon] fix option No value Exception bbfe8e25 [simon] add set sparkHome 04f23c8a [simon] fix codestyle 75599233 [simon] add spark-launcher dep ef4b2706 [simon] implement by sparkLauncher 59c25b7c [simon] kill yarn application by restful api 49921a48 [simon] fix ut a31d8f6a [simon] fix ut 1abc6665 [simon] rename killApplication 43a95c1d [simon] Merge branch 'master' into killyarnjob 3d9c12d3 [simon] Merge remote-tracking branch 'upstream/master' 9eaeb16d [simon] fix ut 64ee1b11 [simon] code style 5b905dbd [simon] Merge remote-tracking branch 'upstream/master' 139f3b79 [Simon] Merge branch 'apache:master' into killyarnjob 1a52401d [simon] #1039 Lead-authored-by: simon <zhangshiming@cvte.com> Co-authored-by: Simon <3656562@qq.com> Signed-off-by: ulysses-you <ulyssesyou@apache.org>
This commit is contained in:
parent
6420a7eb04
commit
2ae6acda65
29
bin/stop-application.sh
Executable file
29
bin/stop-application.sh
Executable file
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env bash
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
if [[ $# < 1 ]] ; then
|
||||
echo "USAGE: $0 <application_id>"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -z ${HADOOP_HOME} ]]; then
|
||||
echo "Error: HADOOP_HOME IS NOT SET! CANNOT PROCEED."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
$HADOOP_HOME/bin/yarn application -kill $1
|
||||
@ -182,10 +182,11 @@ private[kyuubi] class EngineRef(
|
||||
}
|
||||
}
|
||||
if (started + timeout <= System.currentTimeMillis()) {
|
||||
val killMessage = builder.killApplication()
|
||||
process.destroyForcibly()
|
||||
MetricsSystem.tracing(_.incCount(MetricRegistry.name(ENGINE_TIMEOUT, appUser)))
|
||||
throw KyuubiSQLException(
|
||||
s"Timeout($timeout ms) to launched Spark with $builder",
|
||||
s"Timeout($timeout ms) to launched Spark with $builder. $killMessage",
|
||||
builder.getError)
|
||||
}
|
||||
engineRef = getEngineByRefId(zkClient, engineSpace, engineRefId)
|
||||
|
||||
@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets
|
||||
import java.nio.file.{Files, Path}
|
||||
|
||||
import scala.collection.JavaConverters._
|
||||
import scala.util.matching.Regex
|
||||
|
||||
import org.apache.commons.lang3.StringUtils.containsIgnoreCase
|
||||
|
||||
@ -148,6 +149,29 @@ trait ProcBuilder {
|
||||
proc
|
||||
}
|
||||
|
||||
val YARN_APP_NAME_REGEX: Regex = "application_\\d+_\\d+".r
|
||||
|
||||
def killApplication(line: String = lastRowOfLog): String =
|
||||
YARN_APP_NAME_REGEX.findFirstIn(line) match {
|
||||
case Some(appId) =>
|
||||
env.get(KyuubiConf.KYUUBI_HOME) match {
|
||||
case Some(kyuubiHome) =>
|
||||
val pb = new ProcessBuilder("/bin/sh", s"$kyuubiHome/bin/stop-application.sh", appId)
|
||||
pb.environment()
|
||||
.putAll(env.asJava)
|
||||
pb.redirectError(engineLog)
|
||||
pb.redirectOutput(engineLog)
|
||||
val process = pb.start()
|
||||
process.waitFor() match {
|
||||
case id if id != 0 => s"Failed to kill Application $appId, please kill it manually. "
|
||||
case _ => s"Killed Application $appId successfully. "
|
||||
}
|
||||
case None =>
|
||||
s"KYUUBI_HOME is not set! Failed to kill Application $appId, please kill it manually."
|
||||
}
|
||||
case None => ""
|
||||
}
|
||||
|
||||
def close(): Unit = {
|
||||
if (logCaptureThread != null) {
|
||||
logCaptureThread.interrupt()
|
||||
|
||||
Loading…
Reference in New Issue
Block a user