From 75891d1a92143d4d25de9680101b06fc43a3a199 Mon Sep 17 00:00:00 2001 From: "Wang, Fei" Date: Thu, 24 Apr 2025 22:40:28 -0700 Subject: [PATCH] [KYUUBI #7034][FOLLOWUP] Decouple the kubernetes pod name and app name ### Why are the changes needed? Followup for #7034 to fix the SparkOnKubernetesTestsSuite. Sorry, I forget that the appInfo name and pod name were deeply bound before, the appInfo name was used as pod name and used to delete pod. In this PR, we add `podName` into applicationInfo to separate app name and pod name. ### How was this patch tested? GA should pass. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #7039 from turboFei/fix_test. Closes #7034 0ff7018d6 [Wang, Fei] revert 18e48c079 [Wang, Fei] comments 19f34bc83 [Wang, Fei] do not get pod name from appName c1d308437 [Wang, Fei] reduce interval for test stability 50fad6bc5 [Wang, Fei] fix ut Authored-by: Wang, Fei Signed-off-by: Wang, Fei --- .../test/spark/SparkOnKubernetesTestsSuite.scala | 2 +- .../kyuubi/engine/ApplicationOperation.scala | 7 +++++-- .../engine/KubernetesApplicationOperation.scala | 15 +++++++++++---- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala index 239454f49..562ee6379 100644 --- a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala +++ b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala @@ -230,7 +230,7 @@ class KyuubiOperationKubernetesClusterClusterModeSuite appMgrInfo, sessionHandle.identifier.toString) assert(appInfo.state == RUNNING) - assert(appInfo.name.startsWith(driverPodNamePrefix)) + assert(appInfo.podName.exists(_.startsWith(driverPodNamePrefix))) } val killResponse = k8sOperation.killApplicationByTag( diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ApplicationOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ApplicationOperation.scala index a08f37a45..d54dff1a6 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ApplicationOperation.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/ApplicationOperation.scala @@ -113,7 +113,9 @@ case class ApplicationInfo( name: String, state: ApplicationState, url: Option[String] = None, - error: Option[String] = None) { + error: Option[String] = None, + // only used for K8s and still possible to be None for cases like NOT_FOUND state for K8s cases + podName: Option[String] = None) { def toMap: Map[String, String] = { Map( @@ -121,7 +123,8 @@ case class ApplicationInfo( "name" -> name, "state" -> state.toString, "url" -> url.orNull, - "error" -> error.orNull) + "error" -> error.orNull) ++ + podName.map("podName" -> _).toMap } } diff --git a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala index 4564c1eb5..c53dda0e3 100644 --- a/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala +++ b/kyuubi-server/src/main/scala/org/apache/kyuubi/engine/KubernetesApplicationOperation.scala @@ -139,7 +139,7 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging { case COMPLETED => !ApplicationState.isFailed(notification.getValue) } if (shouldDelete) { - deletePod(kubernetesInfo, removed.name, appLabel) + deletePod(kubernetesInfo, removed.podName.orNull, appLabel) } info(s"Remove terminated application $removed with ${toLabel(appLabel)}") } @@ -218,8 +218,13 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging { false, s"[$kubernetesInfo] Target application[${toLabel(tag)}] is in ${info.state} state") case _ => + val deleted = info.podName match { + case Some(podName) => !kubernetesClient.pods.withName(podName).delete().isEmpty + case None => + !kubernetesClient.pods.withLabel(LABEL_KYUUBI_UNIQUE_KEY, tag).delete().isEmpty + } ( - !kubernetesClient.pods.withName(info.name).delete().isEmpty, + deleted, s"[$kubernetesInfo] Operation of deleted" + s" application[appId: ${info.id}, ${toLabel(tag)}] is completed") } @@ -410,7 +415,8 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging { id = getPodAppId(pod), name = getPodAppName(pod), state = appState, - error = appError)) + error = appError, + podName = Some(pod.getMetadata.getName))) }.getOrElse { appInfoStore.put( kyuubiUniqueKey, @@ -418,7 +424,8 @@ class KubernetesApplicationOperation extends ApplicationOperation with Logging { id = getPodAppId(pod), name = getPodAppName(pod), state = appState, - error = appError)) + error = appError, + podName = Some(pod.getMetadata.getName))) } } }