Using ha.zookeeper.acl.enabled to control ZK acl

2020-11-09 17:34:55 +08:00 · 2020-11-09 17:34:55 +08:00 · f8dc5a9ca3
commit f8dc5a9ca3
parent c8bdb5649d
11 changed files with 155 additions and 25 deletions
--- a/docs/deployment/index.rst
+++ b/docs/deployment/index.rst
@ -9,10 +9,12 @@ Deploying Kyuubi
    :numbered: 3

    settings
+    on_yarn
    architecture
    authentication
    authorization
    deploy
    high_availability_guide
    metrics
+    logging
    trouble_shooting
--- a/docs/deployment/logging.md
+++ b/docs/deployment/logging.md
@ -0,0 +1,14 @@
+<div align=center>
+
+![](../imgs/kyuubi_logo_simple.png)
+
+</div>
+
+# Logging
+
+## Server Logging
+
+## Process Logging
+
+## Operation Logging
+
--- a/docs/deployment/on_yarn.md
+++ b/docs/deployment/on_yarn.md
@ -0,0 +1,14 @@
+<div align=center>
+
+![](../imgs/kyuubi_logo_simple.png)
+
+</div>
+
+# Running Kyuubi on Yarn
+
+## Requirements
+
+When you want to deploy Kyuubi's SQL engines on Yarn, you'd better have cognition upon the following concepts.
+
+- [Apache Hadoop Yarn](https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html)
+- [Running Spark on YARN](http://spark.apache.org/docs/latest/running-on-yarn.html)
--- a/docs/deployment/settings.md
+++ b/docs/deployment/settings.md
@ -134,6 +134,7 @@ kyuubi\.frontend<br>\.worker\.keepalive\.time|<div style='width: 80pt;word-wrap:

 Key | Default | Meaning | Since
 --- | --- | --- | ---
+kyuubi\.ha\.zookeeper<br>\.acl\.enabled|<div style='width: 80pt;word-wrap: break-word;white-space: normal'>false</div>|<div style='width: 200pt;word-wrap: break-word;white-space: normal'>Set to true if the zookeeper ensemble is kerberized</div>|<div style='width: 20pt'>1.0.0</div>
 kyuubi\.ha\.zookeeper<br>\.connection\.base\.retry<br>\.wait|<div style='width: 80pt;word-wrap: break-word;white-space: normal'>1000</div>|<div style='width: 200pt;word-wrap: break-word;white-space: normal'>Initial amount of time to wait between retries to the zookeeper ensemble</div>|<div style='width: 20pt'>1.0.0</div>
 kyuubi\.ha\.zookeeper<br>\.connection\.max<br>\.retries|<div style='width: 80pt;word-wrap: break-word;white-space: normal'>3</div>|<div style='width: 200pt;word-wrap: break-word;white-space: normal'>Max retry times for connecting to the zookeeper ensemble</div>|<div style='width: 20pt'>1.0.0</div>
 kyuubi\.ha\.zookeeper<br>\.connection\.max\.retry<br>\.wait|<div style='width: 80pt;word-wrap: break-word;white-space: normal'>30000</div>|<div style='width: 200pt;word-wrap: break-word;white-space: normal'>Max amount of time to wait between retries for BONDED_EXPONENTIAL_BACKOFF policy can reach, or max time until elapsed for UNTIL_ELAPSED policy to connect the zookeeper ensemble</div>|<div style='width: 20pt'>1.0.0</div>
--- a/docs/deployment/trouble_shooting.md
+++ b/docs/deployment/trouble_shooting.md
@ -47,7 +47,7 @@ Build flags:

 To fix this problem you should export `JAVA_HOME` w/ a compatible one in `conf/kyuubi-env.sh`

-```shell script
+```bash
 echo "export JAVA_HOME=/path/to/jdk1.8.0_251" >> conf/kyuubi-env.sh
 ```

@ -67,4 +67,90 @@ Exception in thread "main" org.apache.spark.SparkException: When running with ma
 	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
 ```

-When Kyuubi gets the `spark.master=yarn`, `HADOOP_CONF_DIR` should also be exported in `$KYUUBI_HOME/conf/kyuubi-env.sh`
+When Kyuubi gets the `spark.master=yarn`, `HADOOP_CONF_DIR` should also be exported in `$KYUUBI_HOME/conf/kyuubi-env.sh`.
+
+To fix this problem you should export `HADOOP_CONF_DIR` to the folder that contains the hadoop client settings in `conf/kyuubi-env.sh`.
+
+```bash
+echo "export HADOOP_CONF_DIR=/path/to/hadoop/conf" >> conf/kyuubi-env.sh
+```
+
+
+### javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid credentials provided (Mechanism level: Failed to find any Kerberos tgt)];
+
+
+### org.apache.hadoop.security.AccessControlException: Permission denied: user=hzyanqin, access=WRITE, inode="/user":hdfs:hdfs:drwxr-xr-x
+
+```java
+org.apache.hadoop.security.AccessControlException: Permission denied: user=hzyanqin, access=WRITE, inode="/user":hdfs:hdfs:drwxr-xr-x
+	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:350)
+	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:251)
+	at org.apache.ranger.authorization.hadoop.RangerHdfsAuthorizer$RangerAccessControlEnforcer.checkPermission(RangerHdfsAuthorizer.java:306)
+	at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:189)
+	at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1767)
+	at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1751)
+	at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkAncestorAccess(FSDirectory.java:1710)
+	at org.apache.hadoop.hdfs.server.namenode.FSDirMkdirOp.mkdirs(FSDirMkdirOp.java:60)
+	at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:3062)
+	at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:1156)
+	at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.mkdirs(ClientNamenodeProtocolServerSideTranslatorPB.java:652)
+	at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
+	at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503)
+	at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989)
+	at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871)
+	at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817)
+	at java.security.AccessController.doPrivileged(Native Method)
+	at javax.security.auth.Subject.doAs(Subject.java:422)
+	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893)
+	at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606)
+
+	at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
+	at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
+	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
+	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
+	at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
+	at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
+	at org.apache.hadoop.hdfs.DFSClient.primitiveMkdir(DFSClient.java:3007)
+	at org.apache.hadoop.hdfs.DFSClient.mkdirs(DFSClient.java:2975)
+	at org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:1047)
+	at org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:1043)
+	at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
+	at org.apache.hadoop.hdfs.DistributedFileSystem.mkdirsInternal(DistributedFileSystem.java:1061)
+	at org.apache.hadoop.hdfs.DistributedFileSystem.mkdirs(DistributedFileSystem.java:1036)
+	at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:1881)
+	at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:600)
+	at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:441)
+	at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:876)
+	at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:196)
+	at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:60)
+	at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:201)
+	at org.apache.spark.SparkContext.<init>(SparkContext.scala:555)
+	at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2574)
+	at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$2(SparkSession.scala:934)
+	at scala.Option.getOrElse(Option.scala:189)
+	at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:928)
+	at org.apache.kyuubi.engine.spark.SparkSQLEngine$.createSpark(SparkSQLEngine.scala:72)
+	at org.apache.kyuubi.engine.spark.SparkSQLEngine$.main(SparkSQLEngine.scala:101)
+	at org.apache.kyuubi.engine.spark.SparkSQLEngine.main(SparkSQLEngine.scala)
+	at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
+	at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
+	at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
+	at java.lang.reflect.Method.invoke(Method.java:498)
+	at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
+	at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:928)
+	at org.apache.spark.deploy.SparkSubmit$$anon$1.run(SparkSubmit.scala:165)
+	at org.apache.spark.deploy.SparkSubmit$$anon$1.run(SparkSubmit.scala:163)
+	at java.security.AccessController.doPrivileged(Native Method)
+	at javax.security.auth.Subject.doAs(Subject.java:422)
+	at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746)
+	at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:163)
+	at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
+	at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
+	at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1007)
+	at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1016)
+	at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
+```
+
+The user do not have permission to create to Hadoop home dir, which is `/user/hzyanqin` in the case above.
+
+To fix this problem you need to create this directory first and grant ACL permission for `hzyanqin`.
--- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/KyuubiSparkUtil.scala
+++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/KyuubiSparkUtil.scala
@ -24,14 +24,18 @@ import org.apache.spark.sql.SparkSession
 object KyuubiSparkUtil {

  def diagnostics(spark: SparkSession): String = {
+    val sc = spark.sparkContext
+    val webUrl = sc.getConf.getOption(
+      "spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES")
+      .orElse(sc.uiWebUrl).getOrElse("")
    s"""
-       |           Spark application name: ${spark.sparkContext.appName}
-       |                 application ID:  ${spark.sparkContext.applicationId}
-       |                 application web UI: ${spark.sparkContext.uiWebUrl.getOrElse("")}
-       |                 master: ${spark.sparkContext.master}
-       |                 deploy mode: ${spark.sparkContext.deployMode}
-       |                 version: ${spark.sparkContext.version}
-       |           Start time: ${Instant.ofEpochMilli(spark.sparkContext.startTime)}
-       |           User: ${spark.sparkContext.sparkUser}""".stripMargin
+       |           Spark application name: ${sc.appName}
+       |                 application ID:  ${sc.applicationId}
+       |                 application web UI: $webUrl
+       |                 master: ${sc.master}
+       |                 deploy mode: ${sc.deployMode}
+       |                 version: ${sc.version}
+       |           Start time: ${Instant.ofEpochMilli(sc.startTime)}
+       |           User: ${sc.sparkUser}""".stripMargin
  }
 }
--- a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/HighAvailabilityConf.scala
+++ b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/HighAvailabilityConf.scala
@ -17,6 +17,8 @@

 package org.apache.kyuubi.ha

+import org.apache.hadoop.security.UserGroupInformation
+
 import org.apache.kyuubi.config.{ConfigBuilder, ConfigEntry, KyuubiConf}
 import org.apache.kyuubi.ha.client.RetryPolicies

@ -37,6 +39,13 @@ object HighAvailabilityConf {
    .stringConf
    .createWithDefault("kyuubi")

+  val HA_ZK_ACL_ENABLED: ConfigEntry[Boolean] =
+    buildConf("ha.zookeeper.acl.enabled")
+      .doc("Set to true if the zookeeper ensemble is kerberized")
+      .version("1.0.0")
+      .booleanConf
+      .createWithDefault(UserGroupInformation.isSecurityEnabled)
+
  val HA_ZK_CONN_MAX_RETRIES: ConfigEntry[Int] =
    buildConf("ha.zookeeper.connection.max.retries")
      .doc("Max retry times for connecting to the zookeeper ensemble")
--- a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/ServiceDiscovery.scala
+++ b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/ServiceDiscovery.scala
@ -176,8 +176,6 @@ class ServiceDiscovery private (
 object ServiceDiscovery {
  import RetryPolicies._

-  private final val DEFAULT_ACL_PROVIDER = new ZooKeeperACLProvider()
-
  private final lazy val connectionChecker =
    ThreadUtils.newDaemonSingleThreadScheduledExecutor("zk-connection-checker")

@ -203,7 +201,7 @@ object ServiceDiscovery {
      .connectString(connectionStr)
      .sessionTimeoutMs(sessionTimeout)
      .connectionTimeoutMs(connectionTimeout)
-      .aclProvider(DEFAULT_ACL_PROVIDER)
+      .aclProvider(new ZooKeeperACLProvider(conf))
      .retryPolicy(retryPolicy)
      .build()
  }
--- a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/ZooKeeperACLProvider.scala
+++ b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/ZooKeeperACLProvider.scala
@ -18,11 +18,13 @@
 package org.apache.kyuubi.ha.client

 import org.apache.curator.framework.api.ACLProvider
-import org.apache.hadoop.security.UserGroupInformation
 import org.apache.zookeeper.ZooDefs
 import org.apache.zookeeper.data.ACL

-class ZooKeeperACLProvider extends ACLProvider {
+import org.apache.kyuubi.config.KyuubiConf
+import org.apache.kyuubi.ha.HighAvailabilityConf
+
+class ZooKeeperACLProvider(conf: KyuubiConf) extends ACLProvider {

  /**
   * Return the ACL list to use by default.
@ -31,7 +33,7 @@ class ZooKeeperACLProvider extends ACLProvider {
   */
  override lazy val getDefaultAcl: java.util.List[ACL] = {
    val nodeAcls = new java.util.ArrayList[ACL]
-    if (UserGroupInformation.isSecurityEnabled) {
+    if (conf.get(HighAvailabilityConf.HA_ZK_ACL_ENABLED)) {
      // Read all to the world
      nodeAcls.addAll(ZooDefs.Ids.READ_ACL_UNSAFE)
      // Create/Delete/Write/Admin to the authenticated user
--- a/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/ServiceDiscoverySuite.scala
+++ b/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/ServiceDiscoverySuite.scala
@ -113,18 +113,17 @@ class ServiceDiscoverySuite extends KerberizedTestHelper {
    }
  }

-  ignore("acl for zookeeper") {
-    val provider = new ZooKeeperACLProvider()
+  test("acl for zookeeper") {
+    val provider = new ZooKeeperACLProvider(conf)
    val acl = provider.getDefaultAcl
    assert(acl.size() === 1)
    assert(acl === ZooDefs.Ids.OPEN_ACL_UNSAFE)

-    tryWithSecurityEnabled {
-      val acl1 = new ZooKeeperACLProvider().getDefaultAcl
-      assert(acl1.size() === 2)
-      val expected = ZooDefs.Ids.READ_ACL_UNSAFE
-      expected.addAll(ZooDefs.Ids.CREATOR_ALL_ACL)
-      assert(acl1 === expected)
-    }
+    val conf1 = conf.clone.set(HA_ZK_ACL_ENABLED, true)
+    val acl1 = new ZooKeeperACLProvider(conf1).getDefaultAcl
+    assert(acl1.size() === 2)
+    val expected = ZooDefs.Ids.READ_ACL_UNSAFE
+    expected.addAll(ZooDefs.Ids.CREATOR_ALL_ACL)
+    assert(acl1 === expected)
  }
 }
--- a/kyuubi-main/src/main/scala/org/apache/kyuubi/server/KyuubiServer.scala
+++ b/kyuubi-main/src/main/scala/org/apache/kyuubi/server/KyuubiServer.scala
@ -36,6 +36,7 @@ object KyuubiServer extends Logging {
      zkServer.start()
      sys.addShutdownHook(zkServer.stop())
      conf.set(HA_ZK_QUORUM, zkServer.getConnectString)
+      conf.set(HA_ZK_ACL_ENABLED, false)
    }

    val server = new KyuubiServer()