[KYUUBI #1039] Kill yarn job when engine initialize timeout and yarnApplicationState is ACCEPTED

kill yarn job when engine initialize timeout and yarnApplicationState is ACCEPTED
#1039

### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [ ] [Run test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests) locally before make a pull request

Closes #1119 from simon824/killyarnjob.

Closes #1039

7face4db [simon] get KYUUBI_HOME
da8c3ec4 [Simon] Merge branch 'apache:master' into killyarnjob
95ae8f59 [simon] add appId
8a6ddcd1 [simon] kill application by script
e0cd2af8 [simon] kill application by script
d15d38c5 [simon] kill application by script
1fd1373b [simon] mv sparklauncher to sparkProcessBuilder
5e806426 [simon] fix codestyle
a9cc4505 [simon] fix option No value Exception
9c7ca2f8 [simon] fix option No value Exception
bbfe8e25 [simon] add set sparkHome
04f23c8a [simon] fix codestyle
75599233 [simon] add spark-launcher dep
ef4b2706 [simon] implement by sparkLauncher
59c25b7c [simon] kill yarn application by restful api
49921a48 [simon] fix ut
a31d8f6a [simon] fix ut
1abc6665 [simon] rename killApplication
43a95c1d [simon] Merge branch 'master' into killyarnjob
3d9c12d3 [simon] Merge remote-tracking branch 'upstream/master'
9eaeb16d [simon] fix ut
64ee1b11 [simon] code style
5b905dbd [simon] Merge remote-tracking branch 'upstream/master'
139f3b79 [Simon] Merge branch 'apache:master' into killyarnjob
1a52401d [simon] #1039

Lead-authored-by: simon <zhangshiming@cvte.com>
Co-authored-by: Simon <3656562@qq.com>
Signed-off-by: ulysses-you <ulyssesyou@apache.org>
This commit is contained in:
simon 2021-10-08 16:01:08 +08:00 committed by ulysses-you
parent 6420a7eb04
commit 2ae6acda65
No known key found for this signature in database
GPG Key ID: 4C500BC62D576766
3 changed files with 55 additions and 1 deletions

29
bin/stop-application.sh Executable file
View File

@ -0,0 +1,29 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
if [[ $# < 1 ]] ; then
echo "USAGE: $0 <application_id>"
exit 1
fi
if [[ -z ${HADOOP_HOME} ]]; then
echo "Error: HADOOP_HOME IS NOT SET! CANNOT PROCEED."
exit 1
fi
$HADOOP_HOME/bin/yarn application -kill $1

View File

@ -182,10 +182,11 @@ private[kyuubi] class EngineRef(
}
}
if (started + timeout <= System.currentTimeMillis()) {
val killMessage = builder.killApplication()
process.destroyForcibly()
MetricsSystem.tracing(_.incCount(MetricRegistry.name(ENGINE_TIMEOUT, appUser)))
throw KyuubiSQLException(
s"Timeout($timeout ms) to launched Spark with $builder",
s"Timeout($timeout ms) to launched Spark with $builder. $killMessage",
builder.getError)
}
engineRef = getEngineByRefId(zkClient, engineSpace, engineRefId)

View File

@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Path}
import scala.collection.JavaConverters._
import scala.util.matching.Regex
import org.apache.commons.lang3.StringUtils.containsIgnoreCase
@ -148,6 +149,29 @@ trait ProcBuilder {
proc
}
val YARN_APP_NAME_REGEX: Regex = "application_\\d+_\\d+".r
def killApplication(line: String = lastRowOfLog): String =
YARN_APP_NAME_REGEX.findFirstIn(line) match {
case Some(appId) =>
env.get(KyuubiConf.KYUUBI_HOME) match {
case Some(kyuubiHome) =>
val pb = new ProcessBuilder("/bin/sh", s"$kyuubiHome/bin/stop-application.sh", appId)
pb.environment()
.putAll(env.asJava)
pb.redirectError(engineLog)
pb.redirectOutput(engineLog)
val process = pb.start()
process.waitFor() match {
case id if id != 0 => s"Failed to kill Application $appId, please kill it manually. "
case _ => s"Killed Application $appId successfully. "
}
case None =>
s"KYUUBI_HOME is not set! Failed to kill Application $appId, please kill it manually."
}
case None => ""
}
def close(): Unit = {
if (logCaptureThread != null) {
logCaptureThread.interrupt()