From d83674248e788966e208fed4e02ad25f002afd4b Mon Sep 17 00:00:00 2001 From: qiuliang Date: Thu, 9 Sep 2021 11:48:21 +0800 Subject: [PATCH] [KYUUBI #1034] Engine may deadlock when close operationLog ### _Why are the changes needed?_ ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests) locally before make a pull request Closes #1062 from qiuliang988/1034. Closes #1034 41d510f5 [qiuliang] add comments dbd7c770 [qiuliang] [KYUUBI #1034]Engine may deadlock when close operationLog Authored-by: qiuliang Signed-off-by: Cheng Pan --- .../kyuubi/engine/spark/operation/SparkOperation.scala | 8 +++++++- .../org/apache/kyuubi/operation/log/OperationLog.scala | 9 +++++++-- .../org/apache/kyuubi/operation/KyuubiOperation.scala | 7 +++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala index 67d1ecd69..cc945f7ff 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/operation/SparkOperation.scala @@ -17,6 +17,7 @@ package org.apache.kyuubi.engine.spark.operation +import java.io.IOException import java.time.ZoneId import org.apache.commons.lang3.StringUtils @@ -122,7 +123,12 @@ abstract class SparkOperation(spark: SparkSession, opType: OperationType, sessio override def close(): Unit = { cleanup(OperationState.CLOSED) - getOperationLog.foreach(_.close()) + try { + getOperationLog.foreach(_.close()) + } catch { + case e: IOException => + error(e.getMessage, e) + } } override def getResultSetSchema: TTableSchema = SchemaHelper.toTTableSchema(resultSchema) diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala index 2976f3e0d..1e485532a 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/operation/log/OperationLog.scala @@ -83,7 +83,7 @@ object OperationLog extends Logging { } } -class OperationLog(path: Path) extends Logging { +class OperationLog(path: Path) { private val writer = Files.newBufferedWriter(path, StandardCharsets.UTF_8) private val reader = Files.newBufferedReader(path, StandardCharsets.UTF_8) @@ -134,7 +134,12 @@ class OperationLog(path: Path) extends Logging { Files.delete(path) } catch { case e: IOException => - error(s"Failed to remove corresponding log file of operation: ${path.toAbsolutePath}", e) + // Printing log here may cause a deadlock. The lock order of OperationLog.write + // is RootLogger -> LogDivertAppender -> OperationLog. If printing log here, the + // lock order is OperationLog -> RootLogger. So the exception is thrown and + // processing at the invocations + throw new IOException( + s"Failed to remove corresponding log file of operation: ${path.toAbsolutePath}", e) } } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperation.scala index 5ba246441..256815837 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperation.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/operation/KyuubiOperation.scala @@ -17,6 +17,8 @@ package org.apache.kyuubi.operation +import java.io.IOException + import com.codahale.metrics.MetricRegistry import org.apache.commons.lang3.StringUtils import org.apache.hive.service.rpc.thrift._ @@ -97,6 +99,11 @@ abstract class KyuubiOperation( if (_remoteOpHandle != null && !isClosedOrCanceled) { try { getOperationLog.foreach(_.close()) + } catch { + case e: IOException => + error(e.getMessage, e) + } + try { client.closeOperation(_remoteOpHandle) setState(OperationState.CLOSED) } catch onError("closing")