From dc5f3fb96b608eb6eff4aec30f69f2c16ae5b1b2 Mon Sep 17 00:00:00 2001 From: jiang13021 Date: Mon, 21 Oct 2024 21:06:06 +0800 Subject: [PATCH] [CELEBORN-1662] Handle PUSH_DATA_FAIL_PARTITION_NOT_FOUND in getPushDataFailCause ### What changes were proposed in this pull request? Add a condition at the start of the failure cause logic to check for PUSH_DATA_FAIL_PARTITION_NOT_FOUND. ### Why are the changes needed? Currently, the getPushDataFailCause method does not identify and handle the PUSH_DATA_FAIL_PARTITION_NOT_FOUND error type. All other failure causes are explicitly checked and managed, but this specific error type is overlooked. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Manual test Closes #2833 from jiang13021/celeborn-1662. Authored-by: jiang13021 Signed-off-by: SteNicholas --- .../main/java/org/apache/celeborn/client/ShuffleClientImpl.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/client/src/main/java/org/apache/celeborn/client/ShuffleClientImpl.java b/client/src/main/java/org/apache/celeborn/client/ShuffleClientImpl.java index 31677f7b8..b43bd9598 100644 --- a/client/src/main/java/org/apache/celeborn/client/ShuffleClientImpl.java +++ b/client/src/main/java/org/apache/celeborn/client/ShuffleClientImpl.java @@ -1888,6 +1888,8 @@ public class ShuffleClientImpl extends ShuffleClient { cause = StatusCode.PUSH_DATA_PRIMARY_WORKER_EXCLUDED; } else if (message.startsWith(StatusCode.PUSH_DATA_REPLICA_WORKER_EXCLUDED.name())) { cause = StatusCode.PUSH_DATA_REPLICA_WORKER_EXCLUDED; + } else if (message.startsWith(StatusCode.PUSH_DATA_FAIL_PARTITION_NOT_FOUND.name())) { + cause = StatusCode.PUSH_DATA_FAIL_PARTITION_NOT_FOUND; } else if (ExceptionUtils.connectFail(message)) { // Throw when push to primary worker connection causeException. cause = StatusCode.PUSH_DATA_CONNECTION_EXCEPTION_PRIMARY;