From 4bbc8aec4f7ab325c7d46187aefc6323f01a71e9 Mon Sep 17 00:00:00 2001 From: Angerszhuuuu Date: Wed, 26 Apr 2023 16:45:44 +0800 Subject: [PATCH] [CELEBORN-555][REFACTOR] Avoid prin noisy blacklist info when record blacklist (#1460) * [CELEBORN-555][REFACTOR] Avoid prin noisy blacklist info when record blacklist --- .../celeborn/client/WorkerStatusTracker.scala | 44 ++++++++++++------- 1 file changed, 28 insertions(+), 16 deletions(-) diff --git a/client/src/main/scala/org/apache/celeborn/client/WorkerStatusTracker.scala b/client/src/main/scala/org/apache/celeborn/client/WorkerStatusTracker.scala index 89e4f8618..239b477a4 100644 --- a/client/src/main/scala/org/apache/celeborn/client/WorkerStatusTracker.scala +++ b/client/src/main/scala/org/apache/celeborn/client/WorkerStatusTracker.scala @@ -99,25 +99,37 @@ class WorkerStatusTracker( case _ => } } - if (!failedWorker.isEmpty) { - recordWorkerFailure(failedWorker) - } + recordWorkerFailure(failedWorker) } def recordWorkerFailure(failures: ShuffleFailedWorkers): Unit = { - val failedWorker = new ShuffleFailedWorkers(failures) - logInfo(s"Report Worker Failure: ${failedWorker.asScala}, current blacklist $blacklist") - failedWorker.asScala.foreach { case (worker, (statusCode, registerTime)) => - if (!blacklist.containsKey(worker)) { - blacklist.put(worker, (statusCode, registerTime)) - } else { - statusCode match { - case StatusCode.WORKER_SHUTDOWN | - StatusCode.NO_AVAILABLE_WORKING_DIR | - StatusCode.RESERVE_SLOTS_FAILED | - StatusCode.UNKNOWN_WORKER => - blacklist.put(worker, (statusCode, blacklist.get(worker)._2)) - case _ => // Not cover + if (!failures.isEmpty) { + val failedWorker = new ShuffleFailedWorkers(failures) + val failedWorkerMsg = failedWorker.asScala.map { case (worker, (status, time)) => + s"${worker.readableAddress()} ${status.name()} $time" + }.mkString("\n") + val blacklistMsg = blacklist.asScala.map { case (worker, (status, time)) => + s"${worker.readableAddress()} ${status.name()} $time" + }.mkString("\n") + logInfo( + s""" + |Reporting Worker Failure: + |$failedWorkerMsg + |Current blacklist: + |$blacklistMsg + """.stripMargin) + failedWorker.asScala.foreach { case (worker, (statusCode, registerTime)) => + if (!blacklist.containsKey(worker)) { + blacklist.put(worker, (statusCode, registerTime)) + } else { + statusCode match { + case StatusCode.WORKER_SHUTDOWN | + StatusCode.NO_AVAILABLE_WORKING_DIR | + StatusCode.RESERVE_SLOTS_FAILED | + StatusCode.UNKNOWN_WORKER => + blacklist.put(worker, (statusCode, blacklist.get(worker)._2)) + case _ => // Not cover + } } } }