From b91e2df5cdbadbb149f7ef4e947a1017e4f36772 Mon Sep 17 00:00:00 2001 From: Kent Yao <11215016@zju.edu.cn> Date: Thu, 20 Dec 2018 19:07:45 +0800 Subject: [PATCH] Merge pull request #134 from yaooqinn/KYUUBI-133 [KYUUBI-133]fix #133 token expiration in HadoopRDD getPartitions --- .../main/scala/org/apache/spark/KyuubiSparkUtil.scala | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kyuubi-server/src/main/scala/org/apache/spark/KyuubiSparkUtil.scala b/kyuubi-server/src/main/scala/org/apache/spark/KyuubiSparkUtil.scala index 63b55867c..6daa80315 100644 --- a/kyuubi-server/src/main/scala/org/apache/spark/KyuubiSparkUtil.scala +++ b/kyuubi-server/src/main/scala/org/apache/spark/KyuubiSparkUtil.scala @@ -99,6 +99,7 @@ object KyuubiSparkUtil extends Logging { val HDFS_CLIENT_CACHE_DEFAULT = "true" val FILE_CLIENT_CACHE: String = SPARK_HADOOP_PREFIX + "fs.file.impl.disable.cache" val FILE_CLIENT_CACHE_DEFAULT = "true" + val RDD_PAR_LISTING: String = SPARK_PREFIX + "rdd.parallelListingThreshold" // Runtime Spark Version val SPARK_VERSION: String = org.apache.spark.SPARK_VERSION @@ -275,6 +276,14 @@ object KyuubiSparkUtil extends Logging { if (UserGroupInformation.isSecurityEnabled) { conf.setIfMissing(HDFS_CLIENT_CACHE, HDFS_CLIENT_CACHE_DEFAULT) conf.setIfMissing(FILE_CLIENT_CACHE, FILE_CLIENT_CACHE_DEFAULT) + // If you are using Kyuubi against kerberized HDFS, you will run into HDFS_DELEGATION_TOKEN + // expiration in some particular sql queries. This exception is usually caught in + // HadoopRDD.getPartitions, where the JobConf has no Credentials because it is generated by + // Configuration, and the UGI.getCurrentUser contains only the oldest tokens which are + // destined to expire. The reason seems to be parallel listing UnionRDD's sub RDDs using a + // ForkJoinPool which points to another calling context. Turn off parallel listing seems + // to be a solution to this issue. + conf.setIfMissing(RDD_PAR_LISTING, Int.MaxValue.toString) } }