diff --git a/docs/deployment/index.rst b/docs/deployment/index.rst index 4a165f325..687a29e42 100644 --- a/docs/deployment/index.rst +++ b/docs/deployment/index.rst @@ -9,10 +9,12 @@ Deploying Kyuubi :numbered: 3 settings + on_yarn architecture authentication authorization deploy high_availability_guide metrics + logging trouble_shooting diff --git a/docs/deployment/logging.md b/docs/deployment/logging.md new file mode 100644 index 000000000..c661e0b06 --- /dev/null +++ b/docs/deployment/logging.md @@ -0,0 +1,14 @@ +
+ +![](../imgs/kyuubi_logo_simple.png) + +
+ +# Logging + +## Server Logging + +## Process Logging + +## Operation Logging + diff --git a/docs/deployment/on_yarn.md b/docs/deployment/on_yarn.md new file mode 100644 index 000000000..b66872ca8 --- /dev/null +++ b/docs/deployment/on_yarn.md @@ -0,0 +1,14 @@ +
+ +![](../imgs/kyuubi_logo_simple.png) + +
+ +# Running Kyuubi on Yarn + +## Requirements + +When you want to deploy Kyuubi's SQL engines on Yarn, you'd better have cognition upon the following concepts. + +- [Apache Hadoop Yarn](https://hadoop.apache.org/docs/current/hadoop-yarn/hadoop-yarn-site/YARN.html) +- [Running Spark on YARN](http://spark.apache.org/docs/latest/running-on-yarn.html) diff --git a/docs/deployment/settings.md b/docs/deployment/settings.md index 29f1bf38d..f2510db0a 100644 --- a/docs/deployment/settings.md +++ b/docs/deployment/settings.md @@ -134,6 +134,7 @@ kyuubi\.frontend
\.worker\.keepalive\.time|
false
|
Set to true if the zookeeper ensemble is kerberized
|
1.0.0
kyuubi\.ha\.zookeeper
\.connection\.base\.retry
\.wait|
1000
|
Initial amount of time to wait between retries to the zookeeper ensemble
|
1.0.0
kyuubi\.ha\.zookeeper
\.connection\.max
\.retries|
3
|
Max retry times for connecting to the zookeeper ensemble
|
1.0.0
kyuubi\.ha\.zookeeper
\.connection\.max\.retry
\.wait|
30000
|
Max amount of time to wait between retries for BONDED_EXPONENTIAL_BACKOFF policy can reach, or max time until elapsed for UNTIL_ELAPSED policy to connect the zookeeper ensemble
|
1.0.0
diff --git a/docs/deployment/trouble_shooting.md b/docs/deployment/trouble_shooting.md index c01e1f741..e901c1004 100644 --- a/docs/deployment/trouble_shooting.md +++ b/docs/deployment/trouble_shooting.md @@ -47,7 +47,7 @@ Build flags: To fix this problem you should export `JAVA_HOME` w/ a compatible one in `conf/kyuubi-env.sh` -```shell script +```bash echo "export JAVA_HOME=/path/to/jdk1.8.0_251" >> conf/kyuubi-env.sh ``` @@ -67,4 +67,90 @@ Exception in thread "main" org.apache.spark.SparkException: When running with ma at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) ``` -When Kyuubi gets the `spark.master=yarn`, `HADOOP_CONF_DIR` should also be exported in `$KYUUBI_HOME/conf/kyuubi-env.sh` +When Kyuubi gets the `spark.master=yarn`, `HADOOP_CONF_DIR` should also be exported in `$KYUUBI_HOME/conf/kyuubi-env.sh`. + +To fix this problem you should export `HADOOP_CONF_DIR` to the folder that contains the hadoop client settings in `conf/kyuubi-env.sh`. + +```bash +echo "export HADOOP_CONF_DIR=/path/to/hadoop/conf" >> conf/kyuubi-env.sh +``` + + +### javax.security.sasl.SaslException: GSS initiate failed [Caused by GSSException: No valid credentials provided (Mechanism level: Failed to find any Kerberos tgt)]; + + +### org.apache.hadoop.security.AccessControlException: Permission denied: user=hzyanqin, access=WRITE, inode="/user":hdfs:hdfs:drwxr-xr-x + +```java +org.apache.hadoop.security.AccessControlException: Permission denied: user=hzyanqin, access=WRITE, inode="/user":hdfs:hdfs:drwxr-xr-x + at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.check(FSPermissionChecker.java:350) + at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:251) + at org.apache.ranger.authorization.hadoop.RangerHdfsAuthorizer$RangerAccessControlEnforcer.checkPermission(RangerHdfsAuthorizer.java:306) + at org.apache.hadoop.hdfs.server.namenode.FSPermissionChecker.checkPermission(FSPermissionChecker.java:189) + at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1767) + at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkPermission(FSDirectory.java:1751) + at org.apache.hadoop.hdfs.server.namenode.FSDirectory.checkAncestorAccess(FSDirectory.java:1710) + at org.apache.hadoop.hdfs.server.namenode.FSDirMkdirOp.mkdirs(FSDirMkdirOp.java:60) + at org.apache.hadoop.hdfs.server.namenode.FSNamesystem.mkdirs(FSNamesystem.java:3062) + at org.apache.hadoop.hdfs.server.namenode.NameNodeRpcServer.mkdirs(NameNodeRpcServer.java:1156) + at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolServerSideTranslatorPB.mkdirs(ClientNamenodeProtocolServerSideTranslatorPB.java:652) + at org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos$ClientNamenodeProtocol$2.callBlockingMethod(ClientNamenodeProtocolProtos.java) + at org.apache.hadoop.ipc.ProtobufRpcEngine$Server$ProtoBufRpcInvoker.call(ProtobufRpcEngine.java:503) + at org.apache.hadoop.ipc.RPC$Server.call(RPC.java:989) + at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:871) + at org.apache.hadoop.ipc.Server$RpcCall.run(Server.java:817) + at java.security.AccessController.doPrivileged(Native Method) + at javax.security.auth.Subject.doAs(Subject.java:422) + at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1893) + at org.apache.hadoop.ipc.Server$Handler.run(Server.java:2606) + + at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) + at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:62) + at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) + at java.lang.reflect.Constructor.newInstance(Constructor.java:423) + at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106) + at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:73) + at org.apache.hadoop.hdfs.DFSClient.primitiveMkdir(DFSClient.java:3007) + at org.apache.hadoop.hdfs.DFSClient.mkdirs(DFSClient.java:2975) + at org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:1047) + at org.apache.hadoop.hdfs.DistributedFileSystem$21.doCall(DistributedFileSystem.java:1043) + at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81) + at org.apache.hadoop.hdfs.DistributedFileSystem.mkdirsInternal(DistributedFileSystem.java:1061) + at org.apache.hadoop.hdfs.DistributedFileSystem.mkdirs(DistributedFileSystem.java:1036) + at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:1881) + at org.apache.hadoop.fs.FileSystem.mkdirs(FileSystem.java:600) + at org.apache.spark.deploy.yarn.Client.prepareLocalResources(Client.scala:441) + at org.apache.spark.deploy.yarn.Client.createContainerLaunchContext(Client.scala:876) + at org.apache.spark.deploy.yarn.Client.submitApplication(Client.scala:196) + at org.apache.spark.scheduler.cluster.YarnClientSchedulerBackend.start(YarnClientSchedulerBackend.scala:60) + at org.apache.spark.scheduler.TaskSchedulerImpl.start(TaskSchedulerImpl.scala:201) + at org.apache.spark.SparkContext.(SparkContext.scala:555) + at org.apache.spark.SparkContext$.getOrCreate(SparkContext.scala:2574) + at org.apache.spark.sql.SparkSession$Builder.$anonfun$getOrCreate$2(SparkSession.scala:934) + at scala.Option.getOrElse(Option.scala:189) + at org.apache.spark.sql.SparkSession$Builder.getOrCreate(SparkSession.scala:928) + at org.apache.kyuubi.engine.spark.SparkSQLEngine$.createSpark(SparkSQLEngine.scala:72) + at org.apache.kyuubi.engine.spark.SparkSQLEngine$.main(SparkSQLEngine.scala:101) + at org.apache.kyuubi.engine.spark.SparkSQLEngine.main(SparkSQLEngine.scala) + at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) + at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) + at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) + at java.lang.reflect.Method.invoke(Method.java:498) + at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) + at org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:928) + at org.apache.spark.deploy.SparkSubmit$$anon$1.run(SparkSubmit.scala:165) + at org.apache.spark.deploy.SparkSubmit$$anon$1.run(SparkSubmit.scala:163) + at java.security.AccessController.doPrivileged(Native Method) + at javax.security.auth.Subject.doAs(Subject.java:422) + at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746) + at org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:163) + at org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:203) + at org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:90) + at org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:1007) + at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:1016) + at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala) +``` + +The user do not have permission to create to Hadoop home dir, which is `/user/hzyanqin` in the case above. + +To fix this problem you need to create this directory first and grant ACL permission for `hzyanqin`. diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/KyuubiSparkUtil.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/KyuubiSparkUtil.scala index 2aff3b94b..a7a7621b7 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/KyuubiSparkUtil.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/KyuubiSparkUtil.scala @@ -24,14 +24,18 @@ import org.apache.spark.sql.SparkSession object KyuubiSparkUtil { def diagnostics(spark: SparkSession): String = { + val sc = spark.sparkContext + val webUrl = sc.getConf.getOption( + "spark.org.apache.hadoop.yarn.server.webproxy.amfilter.AmIpFilter.param.PROXY_URI_BASES") + .orElse(sc.uiWebUrl).getOrElse("") s""" - | Spark application name: ${spark.sparkContext.appName} - | application ID: ${spark.sparkContext.applicationId} - | application web UI: ${spark.sparkContext.uiWebUrl.getOrElse("")} - | master: ${spark.sparkContext.master} - | deploy mode: ${spark.sparkContext.deployMode} - | version: ${spark.sparkContext.version} - | Start time: ${Instant.ofEpochMilli(spark.sparkContext.startTime)} - | User: ${spark.sparkContext.sparkUser}""".stripMargin + | Spark application name: ${sc.appName} + | application ID: ${sc.applicationId} + | application web UI: $webUrl + | master: ${sc.master} + | deploy mode: ${sc.deployMode} + | version: ${sc.version} + | Start time: ${Instant.ofEpochMilli(sc.startTime)} + | User: ${sc.sparkUser}""".stripMargin } } diff --git a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/HighAvailabilityConf.scala b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/HighAvailabilityConf.scala index 4e7faf84a..88256251e 100644 --- a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/HighAvailabilityConf.scala +++ b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/HighAvailabilityConf.scala @@ -17,6 +17,8 @@ package org.apache.kyuubi.ha +import org.apache.hadoop.security.UserGroupInformation + import org.apache.kyuubi.config.{ConfigBuilder, ConfigEntry, KyuubiConf} import org.apache.kyuubi.ha.client.RetryPolicies @@ -37,6 +39,13 @@ object HighAvailabilityConf { .stringConf .createWithDefault("kyuubi") + val HA_ZK_ACL_ENABLED: ConfigEntry[Boolean] = + buildConf("ha.zookeeper.acl.enabled") + .doc("Set to true if the zookeeper ensemble is kerberized") + .version("1.0.0") + .booleanConf + .createWithDefault(UserGroupInformation.isSecurityEnabled) + val HA_ZK_CONN_MAX_RETRIES: ConfigEntry[Int] = buildConf("ha.zookeeper.connection.max.retries") .doc("Max retry times for connecting to the zookeeper ensemble") diff --git a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/ServiceDiscovery.scala b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/ServiceDiscovery.scala index d0338eb88..17d767374 100644 --- a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/ServiceDiscovery.scala +++ b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/ServiceDiscovery.scala @@ -176,8 +176,6 @@ class ServiceDiscovery private ( object ServiceDiscovery { import RetryPolicies._ - private final val DEFAULT_ACL_PROVIDER = new ZooKeeperACLProvider() - private final lazy val connectionChecker = ThreadUtils.newDaemonSingleThreadScheduledExecutor("zk-connection-checker") @@ -203,7 +201,7 @@ object ServiceDiscovery { .connectString(connectionStr) .sessionTimeoutMs(sessionTimeout) .connectionTimeoutMs(connectionTimeout) - .aclProvider(DEFAULT_ACL_PROVIDER) + .aclProvider(new ZooKeeperACLProvider(conf)) .retryPolicy(retryPolicy) .build() } diff --git a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/ZooKeeperACLProvider.scala b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/ZooKeeperACLProvider.scala index af7209ddc..582f98206 100644 --- a/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/ZooKeeperACLProvider.scala +++ b/kyuubi-ha/src/main/scala/org/apache/kyuubi/ha/client/ZooKeeperACLProvider.scala @@ -18,11 +18,13 @@ package org.apache.kyuubi.ha.client import org.apache.curator.framework.api.ACLProvider -import org.apache.hadoop.security.UserGroupInformation import org.apache.zookeeper.ZooDefs import org.apache.zookeeper.data.ACL -class ZooKeeperACLProvider extends ACLProvider { +import org.apache.kyuubi.config.KyuubiConf +import org.apache.kyuubi.ha.HighAvailabilityConf + +class ZooKeeperACLProvider(conf: KyuubiConf) extends ACLProvider { /** * Return the ACL list to use by default. @@ -31,7 +33,7 @@ class ZooKeeperACLProvider extends ACLProvider { */ override lazy val getDefaultAcl: java.util.List[ACL] = { val nodeAcls = new java.util.ArrayList[ACL] - if (UserGroupInformation.isSecurityEnabled) { + if (conf.get(HighAvailabilityConf.HA_ZK_ACL_ENABLED)) { // Read all to the world nodeAcls.addAll(ZooDefs.Ids.READ_ACL_UNSAFE) // Create/Delete/Write/Admin to the authenticated user diff --git a/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/ServiceDiscoverySuite.scala b/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/ServiceDiscoverySuite.scala index a35d7efcf..f0d0fa571 100644 --- a/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/ServiceDiscoverySuite.scala +++ b/kyuubi-ha/src/test/scala/org/apache/kyuubi/ha/client/ServiceDiscoverySuite.scala @@ -113,18 +113,17 @@ class ServiceDiscoverySuite extends KerberizedTestHelper { } } - ignore("acl for zookeeper") { - val provider = new ZooKeeperACLProvider() + test("acl for zookeeper") { + val provider = new ZooKeeperACLProvider(conf) val acl = provider.getDefaultAcl assert(acl.size() === 1) assert(acl === ZooDefs.Ids.OPEN_ACL_UNSAFE) - tryWithSecurityEnabled { - val acl1 = new ZooKeeperACLProvider().getDefaultAcl - assert(acl1.size() === 2) - val expected = ZooDefs.Ids.READ_ACL_UNSAFE - expected.addAll(ZooDefs.Ids.CREATOR_ALL_ACL) - assert(acl1 === expected) - } + val conf1 = conf.clone.set(HA_ZK_ACL_ENABLED, true) + val acl1 = new ZooKeeperACLProvider(conf1).getDefaultAcl + assert(acl1.size() === 2) + val expected = ZooDefs.Ids.READ_ACL_UNSAFE + expected.addAll(ZooDefs.Ids.CREATOR_ALL_ACL) + assert(acl1 === expected) } } diff --git a/kyuubi-main/src/main/scala/org/apache/kyuubi/server/KyuubiServer.scala b/kyuubi-main/src/main/scala/org/apache/kyuubi/server/KyuubiServer.scala index 5c01853b9..5435578ff 100644 --- a/kyuubi-main/src/main/scala/org/apache/kyuubi/server/KyuubiServer.scala +++ b/kyuubi-main/src/main/scala/org/apache/kyuubi/server/KyuubiServer.scala @@ -36,6 +36,7 @@ object KyuubiServer extends Logging { zkServer.start() sys.addShutdownHook(zkServer.stop()) conf.set(HA_ZK_QUORUM, zkServer.getConnectString) + conf.set(HA_ZK_ACL_ENABLED, false) } val server = new KyuubiServer()