From bb9aa660c8b59e12a8e93df02f1e510f1ca79f82 Mon Sep 17 00:00:00 2001 From: zwangsheng <2213335496@qq.com> Date: Fri, 5 Nov 2021 11:26:13 +0800 Subject: [PATCH] [KYUUBI #1330] fix tool cleaner process bug ### _Why are the changes needed?_ When using the tool to help clean up spark on K8s residual cache files, I encountered unreported sleep conditions. After analysis, it was found that the SSD mounted when `needToDeepClean` was run encountered an incorrect catch exception. Now, `Try Catch` is added to ensure the normal operation of the function, and deep cleaning is performed by default if a problem occurs. In this case, the disk space still overruns after the cleaning opportunity is missed. ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests) locally before make a pull request Closes #1330 from zwangsheng/tools/fix-process. Closes #1330 6daf483c [zwangsheng] catch nonfatal 89caf897 [zwangsheng] fix tool cleaner process bug b487cf8c [zwangsheng] fix tool cleaner process bug Authored-by: zwangsheng <2213335496@qq.com> Signed-off-by: ulysses-you (cherry picked from commit 60392349a777b6cdd02a6413e4871d3cf19969b7) Signed-off-by: ulysses-you --- .../tools/KubernetesSparkBlockCleaner.scala | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/tools/spark-block-cleaner/src/main/scala/org/apache/kyuubi/tools/KubernetesSparkBlockCleaner.scala b/tools/spark-block-cleaner/src/main/scala/org/apache/kyuubi/tools/KubernetesSparkBlockCleaner.scala index 801559da9..f2c51af15 100644 --- a/tools/spark-block-cleaner/src/main/scala/org/apache/kyuubi/tools/KubernetesSparkBlockCleaner.scala +++ b/tools/spark-block-cleaner/src/main/scala/org/apache/kyuubi/tools/KubernetesSparkBlockCleaner.scala @@ -154,13 +154,20 @@ object KubernetesSparkBlockCleaner extends Logging { import scala.sys.process._ private def needToDeepClean(dir: String): Boolean = { - val used = (s"df $dir" #| s"grep $dir").!! - .split(" ").filter(_.endsWith("%")) { - 0 - }.replace("%", "") - info(s"$dir now used $used% space") + try { + val used = (s"df $dir" #| s"grep $dir").!! + .split(" ").filter(_.endsWith("%")) { + 0 + }.replace("%", "") + info(s"$dir now used $used% space") - used.toInt > (100 - freeSpaceThreshold) + used.toInt > (100 - freeSpaceThreshold) + } catch { + case NonFatal(e) => + error(s"An error occurs when querying the disk $dir capacity, " + + s"return true to make sure the disk space will not overruns: ${e.getMessage}") + true + } } private def doCleanJob(dir: String): Unit = {