[CELEBORN-84][IMPROVEMENT] Blacklist critical reason should avoid been covered by normal reason (#1043)

* [CELEBORN-84][IMPROVEMENT] Blacklist critical reason should avoid been covered by normal reason
This commit is contained in:
Angerszhuuuu 2022-12-05 14:02:33 +08:00 committed by GitHub
parent b262591da8
commit 5eaad136a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1924,7 +1924,20 @@ class LifecycleManager(appId: String, val conf: CelebornConf) extends RpcEndpoin
: Unit = {
val failedWorker = new ConcurrentHashMap[WorkerInfo, (StatusCode, Long)](failures)
logInfo(s"Report Worker Failure: ${failedWorker.asScala}, current blacklist $blacklist")
blacklist.putAll(failedWorker)
failedWorker.asScala.foreach { case (worker, (statusCode, registerTime)) =>
if (!blacklist.containsKey(worker)) {
blacklist.put(worker, (statusCode, registerTime))
} else {
statusCode match {
case StatusCode.WORKER_SHUTDOWN |
StatusCode.NO_AVAILABLE_WORKING_DIR |
StatusCode.RESERVE_SLOTS_FAILED |
StatusCode.UNKNOWN_WORKER =>
blacklist.put(worker, (statusCode, blacklist.get(worker)._2))
case _ => // Not cover
}
}
}
}
def checkQuota(): Boolean = {