Using ha.zookeeper.acl.enabled to control ZK acl

This commit is contained in:
Kent Yao 2020-11-09 17:34:55 +08:00
parent c8bdb5649d
commit f8dc5a9ca3
11 changed files with 155 additions and 25 deletions

View File

@ -9,10 +9,12 @@ Deploying Kyuubi
:numbered: 3
settings
on_yarn
architecture
authentication
authorization
deploy
high_availability_guide
metrics
logging
trouble_shooting

View File

@ -0,0 +1,14 @@
<div align=center>
![](../imgs/kyuubi_logo_simple.png)
</div>
# Logging
## Server Logging
## Process Logging
## Operation Logging

View File

@ -0,0 +1,14 @@
<div align=center>
![](../imgs/kyuubi_logo_simple.png)
</div>
# Running Kyuubi on Yarn
## Requirements
When you want to deploy Kyuubi's SQL engines on Yarn, you'd better have cognition upon the following concepts.
- [Apache Hadoop Yarn](https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html)
- [Running Spark on YARN](http://spark.apache.org/docs/latest/running-on-yarn.html)

View File

@ -134,6 +134,7 @@ kyuubi\.frontend<br>\.worker\.keepalive\.time|<div style='width: 80pt;word-wrap:
Key | Default | Meaning | Since
--- | --- | --- | ---
kyuubi\.ha\.zookeeper<br>\.acl\.enabled|<div style='width: 80pt;word-wrap: break-word;white-space: normal'>false</div>|<div style='width: 200pt;word-wrap: break-word;white-space: normal'>Set to true if the zookeeper ensemble is kerberized</div>|<div style='width: 20pt'>1.0.0</div>
kyuubi\.ha\.zookeeper<br>\.connection\.base\.retry<br>\.wait|<div style='width: 80pt;word-wrap: break-word;white-space: normal'>1000</div>|<div style='width: 200pt;word-wrap: break-word;white-space: normal'>Initial amount of time to wait between retries to the zookeeper ensemble</div>|<div style='width: 20pt'>1.0.0</div>
kyuubi\.ha\.zookeeper<br>\.connection\.max<br>\.retries|<div style='width: 80pt;word-wrap: break-word;white-space: normal'>3</div>|<div style='width: 200pt;word-wrap: break-word;white-space: normal'>Max retry times for connecting to the zookeeper ensemble</div>|<div style='width: 20pt'>1.0.0</div>
kyuubi\.ha\.zookeeper<br>\.connection\.max\.retry<br>\.wait|<div style='width: 80pt;word-wrap: break-word;white-space: normal'>30000</div>|<div style='width: 200pt;word-wrap: break-word;white-space: normal'>Max amount of time to wait between retries for BONDED_EXPONENTIAL_BACKOFF policy can reach, or max time until elapsed for UNTIL_ELAPSED policy to connect the zookeeper ensemble</div>|<div style='width: 20pt'>1.0.0</div>

View File

@ -47,7 +47,7 @@ Build flags:
To fix this problem you should export `JAVA_HOME` w/ a compatible one in `conf/kyuubi-env.sh`
```shell script
```bash
echo "export JAVA_HOME=/path/to/jdk1.8.0_251" >> conf/kyuubi-env.sh
```
@ -67,4 +67,90 @@ Exception in thread "main" org.apache.spark.SparkException: When running with ma
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
```
When Kyuubi gets the `spark.master=yarn`, `HADOOP_CONF_DIR` should also be exported in `$KYUUBI_HOME/conf/kyuubi-env.sh`
When Kyuubi gets the `spark.master=yarn`, `HADOOP_CONF_DIR` should also be exported in `$KYUUBI_HOME/conf/kyuubi-env.sh`.
To fix this problem you should export `HADOOP_CONF_DIR` to the folder that contains the hadoop client settings in `conf/kyuubi-env.sh`.
```bash
echo "export HADOOP_CONF_DIR=/path/to/hadoop/conf" >> conf/kyuubi-env.sh
```
### javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid credentials provided (Mechanism level: Failed to find any Kerberos tgt)];
### org.apache.hadoop.security.AccessControlException: Permission denied: user=hzyanqin, access=WRITE, inode="/user":hdfs:hdfs:drwxr-xr-x
```java
org.apache.hadoop.security.AccessControlException: Permission denied: user=hzyanqin, access=WRITE, inode="/user":hdfs:hdfs:drwxr-xr-x
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:350)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:251)
at org.apache.ranger.authorization.hadoop.RangerHdfsAuthorizer$RangerAccessControlEnforcer.checkPermission(RangerHdfsAuthorizer.java:306)
at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:189)
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1767)
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1751)
at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkAncestorAccess(FSDirectory.java:1710)
at org.apache.hadoop.hdfs.server.namenode.FSDirMkdirOp.mkdirs(FSDirMkdirOp.java:60)
at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:3062)
at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:1156)
at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.mkdirs(ClientNamenodeProtocolServerSideTranslatorPB.java:652)
at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java)
at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503)
at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871)
at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893)
at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606)
at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62)
at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73)
at org.apache.hadoop.hdfs.DFSClient.primitiveMkdir(DFSClient.java:3007)
at org.apache.hadoop.hdfs.DFSClient.mkdirs(DFSClient.java:2975)
at org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:1047)
at org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:1043)
at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
at org.apache.hadoop.hdfs.DistributedFileSystem.mkdirsInternal(DistributedFileSystem.java:1061)
at org.apache.hadoop.hdfs.DistributedFileSystem.mkdirs(DistributedFileSystem.java:1036)
at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:1881)
at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:600)
at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:441)
at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:876)
at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:196)
at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:60)
at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:201)
at org.apache.spark.SparkContext.<init>(SparkContext.scala:555)
at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2574)
at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$2(SparkSession.scala:934)
at scala.Option.getOrElse(Option.scala:189)
at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:928)
at org.apache.kyuubi.engine.spark.SparkSQLEngine$.createSpark(SparkSQLEngine.scala:72)
at org.apache.kyuubi.engine.spark.SparkSQLEngine$.main(SparkSQLEngine.scala:101)
at org.apache.kyuubi.engine.spark.SparkSQLEngine.main(SparkSQLEngine.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:928)
at org.apache.spark.deploy.SparkSubmit$$anon$1.run(SparkSubmit.scala:165)
at org.apache.spark.deploy.SparkSubmit$$anon$1.run(SparkSubmit.scala:163)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746)
at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:163)
at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203)
at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90)
at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1007)
at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1016)
at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
```
The user do not have permission to create to Hadoop home dir, which is `/user/hzyanqin` in the case above.
To fix this problem you need to create this directory first and grant ACL permission for `hzyanqin`.

View File

@ -24,14 +24,18 @@ import org.apache.spark.sql.SparkSession
object KyuubiSparkUtil {
def diagnostics(spark: SparkSession): String = {
val sc = spark.sparkContext
val webUrl = sc.getConf.getOption(
"spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES")
.orElse(sc.uiWebUrl).getOrElse("")
s"""
| Spark application name: ${spark.sparkContext.appName}
| application ID: ${spark.sparkContext.applicationId}
| application web UI: ${spark.sparkContext.uiWebUrl.getOrElse("")}
| master: ${spark.sparkContext.master}
| deploy mode: ${spark.sparkContext.deployMode}
| version: ${spark.sparkContext.version}
| Start time: ${Instant.ofEpochMilli(spark.sparkContext.startTime)}
| User: ${spark.sparkContext.sparkUser}""".stripMargin
| Spark application name: ${sc.appName}
| application ID: ${sc.applicationId}
| application web UI: $webUrl
| master: ${sc.master}
| deploy mode: ${sc.deployMode}
| version: ${sc.version}
| Start time: ${Instant.ofEpochMilli(sc.startTime)}
| User: ${sc.sparkUser}""".stripMargin
}
}

View File

@ -17,6 +17,8 @@
package org.apache.kyuubi.ha
import org.apache.hadoop.security.UserGroupInformation
import org.apache.kyuubi.config.{ConfigBuilder, ConfigEntry, KyuubiConf}
import org.apache.kyuubi.ha.client.RetryPolicies
@ -37,6 +39,13 @@ object HighAvailabilityConf {
.stringConf
.createWithDefault("kyuubi")
val HA_ZK_ACL_ENABLED: ConfigEntry[Boolean] =
buildConf("ha.zookeeper.acl.enabled")
.doc("Set to true if the zookeeper ensemble is kerberized")
.version("1.0.0")
.booleanConf
.createWithDefault(UserGroupInformation.isSecurityEnabled)
val HA_ZK_CONN_MAX_RETRIES: ConfigEntry[Int] =
buildConf("ha.zookeeper.connection.max.retries")
.doc("Max retry times for connecting to the zookeeper ensemble")

View File

@ -176,8 +176,6 @@ class ServiceDiscovery private (
object ServiceDiscovery {
import RetryPolicies._
private final val DEFAULT_ACL_PROVIDER = new ZooKeeperACLProvider()
private final lazy val connectionChecker =
ThreadUtils.newDaemonSingleThreadScheduledExecutor("zk-connection-checker")
@ -203,7 +201,7 @@ object ServiceDiscovery {
.connectString(connectionStr)
.sessionTimeoutMs(sessionTimeout)
.connectionTimeoutMs(connectionTimeout)
.aclProvider(DEFAULT_ACL_PROVIDER)
.aclProvider(new ZooKeeperACLProvider(conf))
.retryPolicy(retryPolicy)
.build()
}

View File

@ -18,11 +18,13 @@
package org.apache.kyuubi.ha.client
import org.apache.curator.framework.api.ACLProvider
import org.apache.hadoop.security.UserGroupInformation
import org.apache.zookeeper.ZooDefs
import org.apache.zookeeper.data.ACL
class ZooKeeperACLProvider extends ACLProvider {
import org.apache.kyuubi.config.KyuubiConf
import org.apache.kyuubi.ha.HighAvailabilityConf
class ZooKeeperACLProvider(conf: KyuubiConf) extends ACLProvider {
/**
* Return the ACL list to use by default.
@ -31,7 +33,7 @@ class ZooKeeperACLProvider extends ACLProvider {
*/
override lazy val getDefaultAcl: java.util.List[ACL] = {
val nodeAcls = new java.util.ArrayList[ACL]
if (UserGroupInformation.isSecurityEnabled) {
if (conf.get(HighAvailabilityConf.HA_ZK_ACL_ENABLED)) {
// Read all to the world
nodeAcls.addAll(ZooDefs.Ids.READ_ACL_UNSAFE)
// Create/Delete/Write/Admin to the authenticated user

View File

@ -113,18 +113,17 @@ class ServiceDiscoverySuite extends KerberizedTestHelper {
}
}
ignore("acl for zookeeper") {
val provider = new ZooKeeperACLProvider()
test("acl for zookeeper") {
val provider = new ZooKeeperACLProvider(conf)
val acl = provider.getDefaultAcl
assert(acl.size() === 1)
assert(acl === ZooDefs.Ids.OPEN_ACL_UNSAFE)
tryWithSecurityEnabled {
val acl1 = new ZooKeeperACLProvider().getDefaultAcl
assert(acl1.size() === 2)
val expected = ZooDefs.Ids.READ_ACL_UNSAFE
expected.addAll(ZooDefs.Ids.CREATOR_ALL_ACL)
assert(acl1 === expected)
}
val conf1 = conf.clone.set(HA_ZK_ACL_ENABLED, true)
val acl1 = new ZooKeeperACLProvider(conf1).getDefaultAcl
assert(acl1.size() === 2)
val expected = ZooDefs.Ids.READ_ACL_UNSAFE
expected.addAll(ZooDefs.Ids.CREATOR_ALL_ACL)
assert(acl1 === expected)
}
}

View File

@ -36,6 +36,7 @@ object KyuubiServer extends Logging {
zkServer.start()
sys.addShutdownHook(zkServer.stop())
conf.set(HA_ZK_QUORUM, zkServer.getConnectString)
conf.set(HA_ZK_ACL_ENABLED, false)
}
val server = new KyuubiServer()