From 9b1fa5e0c56bfa8bf2dc9fdb0e8f78d5b112c2e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E5=AE=87=E7=BF=94?= Date: Mon, 13 Sep 2021 20:10:37 +0800 Subject: [PATCH] [KYUUBI #1094] [BUGFIX #1068]Support upload file to HDFS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #1068 ### _Why are the changes needed?_ ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests) locally before make a pull request Closes #1094 from zhang1002/branch-1.3_support-upload-file-to-hdfs. Closes #1094 e96794ed [张宇翔] Support upload file to hdfs ff237744 [张宇翔] Support the eventLog upload to hdfs Authored-by: 张宇翔 Signed-off-by: ulysses-you --- docs/deployment/settings.md | 2 +- .../engine/spark/events/JsonEventLogger.scala | 3 +-- .../events/EventLoggingServiceSuite.scala | 23 +++++++++++-------- .../org/apache/kyuubi/config/KyuubiConf.scala | 6 +++-- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/docs/deployment/settings.md b/docs/deployment/settings.md index 150b2161d..1ce18f6f5 100644 --- a/docs/deployment/settings.md +++ b/docs/deployment/settings.md @@ -163,7 +163,7 @@ kyuubi\.engine
\.deregister\.exception
\.classes|
|
A comma separated list of exception messages. If there is any exception thrown, whose message or stacktrace matches the specified message list, the engine would deregister itself.
|
seq
|
1.2.0
kyuubi\.engine
\.deregister\.exception
\.ttl|
PT30M
|
Time to live(TTL) for exceptions pattern specified in kyuubi.engine.deregister.exception.classes and kyuubi.engine.deregister.exception.messages to deregister engines. Once the total error count hits the kyuubi.engine.deregister.job.max.failures within the TTL, an engine will deregister itself and wait for self-terminated. Otherwise, we suppose that the engine has recovered from temporary failures.
|
duration
|
1.2.0
kyuubi\.engine
\.deregister\.job\.max
\.failures|
4
|
Number of failures of job before deregistering the engine.
|
int
|
1.2.0
-kyuubi\.engine\.event
\.json\.log\.path|
/tmp/kyuubi/events
|
The location of all the engine events go for the builtin JSON logger
|
string
|
1.3.0
+kyuubi\.engine\.event
\.json\.log\.path|
file:/tmp/kyuubi/events
|
The location of all the engine events go for the builtin JSON logger.
  • Local Path: start with 'file:'
  • HDFS Path: start with 'hdfs:'
|
string
|
1.3.0
kyuubi\.engine\.event
\.loggers|
|
A comma separated list of engine history loggers, where engine/session/operation etc events go.
  • SPARK: the events will be written to the spark history events
  • JSON: the events will be written to the location of kyuubi.engine.event.json.log.path
  • JDBC: to be done
  • CUSTOM: to be done.
|
seq
|
1.3.0
kyuubi\.engine
\.initialize\.sql|
SHOW DATABASES
|
SemiColon-separated list of SQL statements to be initialized in the newly created engine before queries. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver.
|
seq
|
1.2.0
kyuubi\.engine\.session
\.initialize\.sql|
SHOW DATABASES
|
SemiColon-separated list of SQL statements to be initialized in the newly created engine session before queries. This configuration can not be used in JDBC url due to the limitation of Beeline/JDBC driver.
|
seq
|
1.3.0
diff --git a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/events/JsonEventLogger.scala b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/events/JsonEventLogger.scala index 32c2a5949..d022cb5e1 100644 --- a/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/events/JsonEventLogger.scala +++ b/externals/kyuubi-spark-sql-engine/src/main/scala/org/apache/kyuubi/engine/spark/events/JsonEventLogger.scala @@ -19,7 +19,6 @@ package org.apache.kyuubi.engine.spark.events import java.io.{BufferedOutputStream, FileOutputStream, IOException, PrintWriter} import java.net.URI -import java.nio.file.Paths import scala.collection.mutable.HashMap @@ -83,7 +82,7 @@ class JsonEventLogger(logName: String, hadoopConf: Configuration) } override def initialize(conf: KyuubiConf): Unit = synchronized { - logRoot = Paths.get(conf.get(ENGINE_EVENT_JSON_LOG_PATH)).toAbsolutePath.toUri + logRoot = URI.create(conf.get(ENGINE_EVENT_JSON_LOG_PATH)) fs = FileSystem.get(logRoot, hadoopConf) requireLogRootWritable() super.initialize(conf) diff --git a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/events/EventLoggingServiceSuite.scala b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/events/EventLoggingServiceSuite.scala index bb526465e..9123f1181 100644 --- a/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/events/EventLoggingServiceSuite.scala +++ b/externals/kyuubi-spark-sql-engine/src/test/scala/org/apache/kyuubi/engine/spark/events/EventLoggingServiceSuite.scala @@ -17,9 +17,11 @@ package org.apache.kyuubi.engine.spark.events -import java.nio.charset.StandardCharsets -import java.nio.file.{Files, Paths} +import java.io.{BufferedReader, InputStreamReader} +import java.nio.file.Paths +import org.apache.hadoop.conf.Configuration +import org.apache.hadoop.fs.{FileSystem, FSDataInputStream, Path} import org.apache.hive.service.rpc.thrift.TExecuteStatementReq import org.scalatest.time.SpanSugar._ @@ -31,24 +33,27 @@ import org.apache.kyuubi.operation.{JDBCTestUtils, OperationHandle} class EventLoggingServiceSuite extends WithSparkSQLEngine with JDBCTestUtils { import EventLoggerType._ - private val logRoot = Utils.createTempDir() + private val logRoot = "file:" + Utils.createTempDir().toString private val currentDate = Utils.getDateFromTimestamp(System.currentTimeMillis()) override def withKyuubiConf: Map[String, String] = Map( KyuubiConf.ENGINE_EVENT_LOGGERS.key -> s"$JSON,$SPARK", - KyuubiConf.ENGINE_EVENT_JSON_LOG_PATH.key -> logRoot.toString, + KyuubiConf.ENGINE_EVENT_JSON_LOG_PATH.key -> logRoot, "spark.eventLog.enabled" -> "true", - "spark.eventLog.dir" -> logRoot.toString + "spark.eventLog.dir" -> logRoot ) override protected def jdbcUrl: String = getJdbcUrl test("round-trip for event logging service") { val engineEventPath = Paths.get( - logRoot.toString, "engine", s"day=$currentDate", KyuubiSparkUtil.engineId + ".json") + logRoot, "engine", s"day=$currentDate", KyuubiSparkUtil.engineId + ".json") val sessionEventPath = Paths.get( - logRoot.toString, "session", s"day=$currentDate", KyuubiSparkUtil.engineId + ".json") - val engineEventReader = Files.newBufferedReader(engineEventPath, StandardCharsets.UTF_8) + logRoot, "session", s"day=$currentDate", KyuubiSparkUtil.engineId + ".json") + // val engineEventReader = Files.newBufferedReader(engineEventPath, StandardCharsets.UTF_8) + val fileSystem: FileSystem = FileSystem.get(new Configuration()) + val fs: FSDataInputStream = fileSystem.open(new Path(engineEventPath.toString)) + val engineEventReader = new BufferedReader(new InputStreamReader(fs)) val readEvent = JsonProtocol.jsonToEvent(engineEventReader.readLine()) assert(readEvent.isInstanceOf[KyuubiEvent]) @@ -92,7 +97,7 @@ class EventLoggingServiceSuite extends WithSparkSQLEngine with JDBCTestUtils { test("statementEvent: generate, dump and query") { val statementEventPath = Paths.get( - logRoot.toString, "statement", s"day=$currentDate", engine.engineId + ".json") + logRoot, "statement", s"day=$currentDate", engine.engineId + ".json") val sql = "select timestamp'2021-06-01'" withSessionHandle { (client, handle) => diff --git a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala index 069dd714a..b2445cfce 100644 --- a/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala +++ b/kyuubi-common/src/main/scala/org/apache/kyuubi/config/KyuubiConf.scala @@ -684,10 +684,12 @@ object KyuubiConf { val ENGINE_EVENT_JSON_LOG_PATH: ConfigEntry[String] = buildConf("engine.event.json.log.path") - .doc("The location of all the engine events go for the builtin JSON logger") + .doc("The location of all the engine events go for the builtin JSON logger.
    " + + "
  • Local Path: start with 'file:'
  • " + + "
  • HDFS Path: start with 'hdfs:'
") .version("1.3.0") .stringConf - .createWithDefault("/tmp/kyuubi/events") + .createWithDefault("file:/tmp/kyuubi/events") val ENGINE_EVENT_LOGGERS: ConfigEntry[Seq[String]] = buildConf("engine.event.loggers")