From 2cb34c4d007ac19b359767963225ee422d765618 Mon Sep 17 00:00:00 2001 From: Cheng Pan Date: Mon, 10 Oct 2022 12:04:41 +0800 Subject: [PATCH] [KYUUBI #3597] Engine should prefer ip for registering on K8s cluster mode ### _Why are the changes needed?_ When Kyuubi runs outside of K8s, and w/o enhanced DNS infrastructure, Kyuubi can not access the Pod by using the hostname of Pod, it blocks the user to run Spark on K8s w/ cluster mode out-of-box. Kyuubi provided a configuration `kyuubi.frontend.connection.url.use.hostname`, turn it off could address this issue, but we can not change the default value globally because of https://github.com/apache/incubator-kyuubi/issues/2266 To improve user experience, we can detect if the Driver is running inside the Pod, and if yes, change `kyuubi.frontend.connection.url.use.hostname` default value to `false`. Close #3578 ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.apache.org/docs/latest/develop_tools/testing.html#running-tests) locally before make a pull request Closes #3597 from pan3793/k8s. Closes #3597 8b411781 [Cheng Pan] doc 0df15e79 [Cheng Pan] Engine should prefer to use ip for registing on K8s cluster mode Authored-by: Cheng Pan Signed-off-by: Cheng Pan --- docs/deployment/settings.md | 2 +- .../org/apache/kyuubi/engine/spark/SparkSQLEngine.scala | 6 +++++- .../test/deployment/KyuubiOnKubernetesTestsSuite.scala | 3 +-- .../kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala | 3 +-- kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala | 2 ++ .../main/scala/org/apache/kyuubi/config/KyuubiConf.scala | 4 +++- 6 files changed, 13 insertions(+), 7 deletions(-) diff --git a/docs/deployment/settings.md b/docs/deployment/settings.md index a845b4472..5905f8689 100644 --- a/docs/deployment/settings.md +++ b/docs/deployment/settings.md @@ -281,7 +281,7 @@ Key | Default | Meaning | Type | Since kyuubi.frontend.backoff.slot.length|PT0.1S|(deprecated) Time to back off during login to the thrift frontend service.|duration|1.0.0 kyuubi.frontend.bind.host|<undefined>|(deprecated) Hostname or IP of the machine on which to run the thrift frontend service via binary protocol.|string|1.0.0 kyuubi.frontend.bind.port|10009|(deprecated) Port of the machine on which to run the thrift frontend service via binary protocol.|int|1.0.0 -kyuubi.frontend.connection.url.use.hostname|true|When true, frontend services prefer hostname, otherwise, ip address|boolean|1.5.0 +kyuubi.frontend.connection.url.use.hostname|true|When true, frontend services prefer hostname, otherwise, ip address. Note that, the default value is set to `false` when engine running on Kubernetes to prevent potential network issue.|boolean|1.5.0 kyuubi.frontend.login.timeout|PT20S|(deprecated) Timeout for Thrift clients during login to the thrift frontend service.|duration|1.0.0 kyuubi.frontend.max.message.size|104857600|(deprecated) Maximum message size in bytes a Kyuubi server will accept.|int|1.0.0 kyuubi.frontend.max.worker.threads|999|(deprecated) Maximum number of threads in the of frontend worker thread pool for the thrift frontend service|int|1.0.0 diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala index bd8486689..17b272315 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/SparkSQLEngine.scala @@ -164,9 +164,13 @@ object SparkSQLEngine extends Logging { val defaultCat = if (KyuubiSparkUtil.hiveClassesArePresent) "hive" else "in-memory" _sparkConf.setIfMissing("spark.sql.catalogImplementation", defaultCat) - kyuubiConf.setIfMissing(KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_PORT, 0) + kyuubiConf.setIfMissing(FRONTEND_THRIFT_BINARY_BIND_PORT, 0) kyuubiConf.setIfMissing(HA_ZK_CONN_RETRY_POLICY, RetryPolicies.N_TIME.toString) + if (Utils.isOnK8s) { + kyuubiConf.setIfMissing(FRONTEND_CONNECTION_URL_USE_HOSTNAME, false) + } + // Pass kyuubi config from spark with `spark.kyuubi` val sparkToKyuubiPrefix = "spark.kyuubi." _sparkConf.getAllWithPrefix(sparkToKyuubiPrefix).foreach { case (k, v) => diff --git a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/deployment/KyuubiOnKubernetesTestsSuite.scala b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/deployment/KyuubiOnKubernetesTestsSuite.scala index 85690a65f..c45c8ff22 100644 --- a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/deployment/KyuubiOnKubernetesTestsSuite.scala +++ b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/deployment/KyuubiOnKubernetesTestsSuite.scala @@ -21,7 +21,7 @@ import org.apache.hadoop.conf.Configuration import org.apache.hadoop.net.NetUtils import org.apache.kyuubi.{Utils, WithSimpleDFSService} -import org.apache.kyuubi.config.KyuubiConf.{FRONTEND_CONNECTION_URL_USE_HOSTNAME, FRONTEND_THRIFT_BINARY_BIND_HOST} +import org.apache.kyuubi.config.KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_HOST import org.apache.kyuubi.kubernetes.test.WithKyuubiServerOnKubernetes import org.apache.kyuubi.operation.SparkQueryTests import org.apache.kyuubi.zookeeper.ZookeeperConf.ZK_CLIENT_PORT_ADDRESS @@ -115,7 +115,6 @@ class KyuubiOnKubernetesWithClusterSparkTestsSuite "spark.kubernetes.authenticate.driver.serviceAccountName" -> "spark", "spark.kubernetes.driver.podTemplateFile" -> driverTemplate.getPath, ZK_CLIENT_PORT_ADDRESS.key -> localhostAddress, - FRONTEND_CONNECTION_URL_USE_HOSTNAME.key -> "false", FRONTEND_THRIFT_BINARY_BIND_HOST.key -> localhostAddress) } } diff --git a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala index 4c82b155f..0b011514d 100644 --- a/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala +++ b/integration-tests/kyuubi-kubernetes-it/src/test/scala/org/apache/kyuubi/kubernetes/test/spark/SparkOnKubernetesTestsSuite.scala @@ -25,7 +25,7 @@ import org.apache.hadoop.net.NetUtils import org.apache.kyuubi.{BatchTestHelper, KyuubiException, Logging, Utils, WithKyuubiServer, WithSimpleDFSService} import org.apache.kyuubi.config.KyuubiConf -import org.apache.kyuubi.config.KyuubiConf.{FRONTEND_CONNECTION_URL_USE_HOSTNAME, FRONTEND_THRIFT_BINARY_BIND_HOST} +import org.apache.kyuubi.config.KyuubiConf.FRONTEND_THRIFT_BINARY_BIND_HOST import org.apache.kyuubi.engine.{ApplicationInfo, ApplicationOperation, KubernetesApplicationOperation} import org.apache.kyuubi.engine.ApplicationState.{FAILED, NOT_FOUND, RUNNING} import org.apache.kyuubi.engine.spark.SparkProcessBuilder @@ -113,7 +113,6 @@ class SparkClusterModeOnKubernetesSuiteBase .set("spark.kubernetes.authenticate.driver.serviceAccountName", "spark") .set("spark.kubernetes.driver.podTemplateFile", driverTemplate.getPath) .set(ZK_CLIENT_PORT_ADDRESS.key, localhostAddress) - .set(FRONTEND_CONNECTION_URL_USE_HOSTNAME.key, "false") .set(FRONTEND_THRIFT_BINARY_BIND_HOST.key, localhostAddress) } } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala index fa1ba21c5..b79038b2f 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/Utils.scala @@ -329,4 +329,6 @@ object Utils extends Logging { */ def getContextOrKyuubiClassLoader: ClassLoader = Option(Thread.currentThread().getContextClassLoader).getOrElse(getKyuubiClassLoader) + + def isOnK8s: Boolean = Files.exists(Paths.get("/var/run/secrets/kubernetes.io")) } diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala index d6ec9f86a..f83866c4a 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala @@ -1328,7 +1328,9 @@ object KyuubiConf { val FRONTEND_CONNECTION_URL_USE_HOSTNAME: ConfigEntry[Boolean] = buildConf("kyuubi.frontend.connection.url.use.hostname") - .doc("When true, frontend services prefer hostname, otherwise, ip address") + .doc("When true, frontend services prefer hostname, otherwise, ip address. Note that, " + + "the default value is set to `false` when engine running on Kubernetes to prevent " + + "potential network issue.") .version("1.5.0") .fallbackConf(ENGINE_CONNECTION_URL_USE_HOSTNAME)