[KYUUBI #703] support hudi-0.9.0
### _Why are the changes needed?_ Hudi has add Spark Sql Support in version 0.9.0. This PR add Hudi-0.9.0 dependency and Unit Testing #703 ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/latest/develop_tools/testing.html#running-tests) locally before make a pull request Closes #994 from simon824/master. Closes #703 a04b2e42 [simon] exclude jdk.tools dep 50e1ab01 [simon] fix jdk11 dep f33ebdf1 [simon] fix hudi dep conflicts ff2585c2 [simon] fix yarn dep conflicts 32dd1ea8 [simon] fix scala version conflicts b8a37401 [simon] add spark.sql.catalogImplementation 82f5422f [simon] fix spark3 dependency 2a6c497d [simon] fix spark3 dependency 4dbec8be [Simon] Merge branch 'apache:master' into master 3f180157 [simon] fix maven 73e48d0a [simon] add spark3 support maven dependency 3def658b [simon] fix missing spark-sql-engine hudi maven dependency 524132d3 [simon] bugfix e98998a6 [simon] fix pom error 59fc6669 [Simon] Merge branch 'apache:master' into master d9e17ebf [simon] fix conflict 2466ece3 [simon] add sparkSQL test on hudi-0.9.0 Lead-authored-by: simon <zhangshiming@cvte.com> Co-authored-by: Simon <3656562@qq.com> Signed-off-by: Kent Yao <yao@apache.org>
This commit is contained in:
parent
32111a3014
commit
098e660a3d
36
externals/kyuubi-spark-sql-engine/pom.xml
vendored
36
externals/kyuubi-spark-sql-engine/pom.xml
vendored
@ -139,6 +139,42 @@
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-avro_${scala.binary.version}</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark3_${scala.binary.version}</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-client</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark_${scala.binary.version}</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.delta</groupId>
|
||||
<artifactId>delta-core_${scala.binary.version}</artifactId>
|
||||
|
||||
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kyuubi.engine.spark.operation
|
||||
|
||||
import org.apache.kyuubi.engine.spark.WithSparkSQLEngine
|
||||
import org.apache.kyuubi.operation.BasicHudiJDBCTests
|
||||
import org.apache.kyuubi.tags.HudiTest
|
||||
|
||||
@HudiTest
|
||||
class SparkHudiOperationSuite extends WithSparkSQLEngine with BasicHudiJDBCTests {
|
||||
override protected def jdbcUrl: String = getJdbcUrl
|
||||
|
||||
override def withKyuubiConf: Map[String, String] = extraConfigs
|
||||
|
||||
override def afterAll(): Unit = {
|
||||
super.afterAll()
|
||||
for ((k, _) <- extraConfigs) {
|
||||
System.clearProperty(k)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kyuubi.tags;
|
||||
|
||||
import org.scalatest.TagAnnotation;
|
||||
|
||||
import java.lang.annotation.ElementType;
|
||||
import java.lang.annotation.Retention;
|
||||
import java.lang.annotation.RetentionPolicy;
|
||||
import java.lang.annotation.Target;
|
||||
|
||||
@TagAnnotation
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target({ElementType.METHOD, ElementType.TYPE})
|
||||
public @interface HudiTest {}
|
||||
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kyuubi
|
||||
|
||||
import java.nio.file.Path
|
||||
|
||||
trait HudiSuiteMixin extends DataLakeSuiteMixin {
|
||||
|
||||
override protected def format: String = "hudi"
|
||||
|
||||
override protected def catalog: String = "spark_catalog"
|
||||
|
||||
override protected def warehouse: Path = Utils.createTempDir()
|
||||
|
||||
override protected def extraJars: String = {
|
||||
var extraJars = ""
|
||||
System.getProperty("java.class.path")
|
||||
.split(":")
|
||||
.filter(_.contains("jar"))
|
||||
.foreach(i => extraJars += i + ",")
|
||||
|
||||
extraJars.substring(0, extraJars.length - 1)
|
||||
}
|
||||
|
||||
override protected def extraConfigs = Map(
|
||||
"spark.sql.catalogImplementation" -> "in-memory",
|
||||
"spark.sql.defaultCatalog" -> catalog,
|
||||
"spark.sql.extensions" -> "org.apache.spark.sql.hudi.HoodieSparkSessionExtension",
|
||||
"spark.serializer" -> "org.apache.spark.serializer.KryoSerializer",
|
||||
"spark.jars" -> extraJars)
|
||||
}
|
||||
@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kyuubi.operation
|
||||
|
||||
import org.apache.kyuubi.HudiSuiteMixin
|
||||
import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._
|
||||
|
||||
|
||||
trait BasicHudiJDBCTests extends JDBCTestUtils with HudiSuiteMixin {
|
||||
|
||||
test("get catalogs") {
|
||||
withJdbcStatement() { statement =>
|
||||
val metaData = statement.getConnection.getMetaData
|
||||
val catalogs = metaData.getCatalogs
|
||||
catalogs.next()
|
||||
assert(catalogs.getString(TABLE_CAT) === "spark_catalog")
|
||||
assert(!catalogs.next())
|
||||
}
|
||||
}
|
||||
|
||||
test("get schemas") {
|
||||
val dbs = Seq("db1", "db2", "db33", "db44")
|
||||
val dbDflts = Seq("default", "global_temp")
|
||||
|
||||
val catalog = "spark_catalog"
|
||||
withDatabases(dbs: _*) { statement =>
|
||||
dbs.foreach(db => statement.execute(s"CREATE DATABASE IF NOT EXISTS $db"))
|
||||
val metaData = statement.getConnection.getMetaData
|
||||
|
||||
Seq("", "*", "%", null, ".*", "_*", "_%", ".%") foreach { pattern =>
|
||||
checkGetSchemas(metaData.getSchemas(catalog, pattern), dbs ++ dbDflts, catalog)
|
||||
}
|
||||
|
||||
Seq("db%", "db.*") foreach { pattern =>
|
||||
checkGetSchemas(metaData.getSchemas(catalog, pattern), dbs, catalog)
|
||||
}
|
||||
|
||||
Seq("db_", "db.") foreach { pattern =>
|
||||
checkGetSchemas(metaData.getSchemas(catalog, pattern), dbs.take(2), catalog)
|
||||
}
|
||||
|
||||
checkGetSchemas(metaData.getSchemas(catalog, "db1"), Seq("db1"), catalog)
|
||||
checkGetSchemas(metaData.getSchemas(catalog, "db_not_exist"), Seq.empty, catalog)
|
||||
}
|
||||
}
|
||||
|
||||
test("get tables") {
|
||||
val table = "table_1_test"
|
||||
val schema = "default"
|
||||
val tableType = "TABLE"
|
||||
|
||||
withJdbcStatement(table) { statement =>
|
||||
statement.execute(
|
||||
s"""
|
||||
| create table $table (
|
||||
| id int,
|
||||
| name string,
|
||||
| price double,
|
||||
| ts long
|
||||
| ) using $format
|
||||
| options (
|
||||
| primaryKey = 'id',
|
||||
| preCombineField = 'ts'
|
||||
| )
|
||||
""".stripMargin)
|
||||
|
||||
val metaData = statement.getConnection.getMetaData
|
||||
val rs1 = metaData.getTables(null, null, null, null)
|
||||
|
||||
assert(rs1.next())
|
||||
val catalogName = rs1.getString(TABLE_CAT)
|
||||
assert(catalogName === "spark_catalog" || catalogName === null)
|
||||
assert(rs1.getString(TABLE_SCHEM) === schema)
|
||||
assert(rs1.getString(TABLE_NAME) == table)
|
||||
assert(rs1.getString(TABLE_TYPE) == tableType)
|
||||
assert(!rs1.next())
|
||||
|
||||
val rs2 = metaData.getTables(null, null, "table%", Array("TABLE"))
|
||||
assert(rs2.next())
|
||||
assert(rs2.getString(TABLE_NAME) == table)
|
||||
assert(!rs2.next())
|
||||
|
||||
val rs3 = metaData.getTables(null, "default", "*", Array("VIEW"))
|
||||
assert(!rs3.next())
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -148,6 +148,42 @@
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-avro_${scala.binary.version}</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark3_${scala.binary.version}</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-client</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark_${scala.binary.version}</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.delta</groupId>
|
||||
<artifactId>delta-core_${scala.binary.version}</artifactId>
|
||||
|
||||
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kyuubi.operation.datalake
|
||||
|
||||
import org.apache.kyuubi.WithKyuubiServer
|
||||
import org.apache.kyuubi.config.KyuubiConf
|
||||
import org.apache.kyuubi.operation.BasicHudiJDBCTests
|
||||
import org.apache.kyuubi.tags.HudiTest
|
||||
|
||||
@HudiTest
|
||||
class HudiOperationSuite extends WithKyuubiServer with BasicHudiJDBCTests {
|
||||
override protected val conf: KyuubiConf = {
|
||||
val kyuubiConf = KyuubiConf().set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 20000L)
|
||||
extraConfigs.foreach { case (k, v) => kyuubiConf.set(k, v) }
|
||||
kyuubiConf
|
||||
}
|
||||
|
||||
override def jdbcUrl: String = getJdbcUrl
|
||||
}
|
||||
76
pom.xml
76
pom.xml
@ -99,7 +99,9 @@
|
||||
<guava.version>30.1-jre</guava.version>
|
||||
<hadoop.version>3.2.2</hadoop.version>
|
||||
<hadoop.binary.version>3.2</hadoop.binary.version>
|
||||
<hbase.version>1.2.3</hbase.version>
|
||||
<hive.version>2.3.7</hive.version>
|
||||
<hudi.version>0.9.0</hudi.version>
|
||||
<iceberg.name>iceberg-spark3-runtime</iceberg.name>
|
||||
<iceberg.version>0.12.0</iceberg.version>
|
||||
<jackson.version>2.11.4</jackson.version>
|
||||
@ -110,6 +112,7 @@
|
||||
<jetty.version>9.4.41.v20210516</jetty.version>
|
||||
<kubernetes-client.version>5.5.0</kubernetes-client.version>
|
||||
<ldapsdk.version>5.1.4</ldapsdk.version>
|
||||
<parquet.version>1.10.1</parquet.version>
|
||||
<prometheus.version>0.10.0</prometheus.version>
|
||||
<scalatest.version>3.2.9</scalatest.version>
|
||||
<scopt.version>4.0.1</scopt.version>
|
||||
@ -1000,6 +1003,75 @@
|
||||
<version>${iceberg.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Hudi dependency -->
|
||||
<dependency>
|
||||
<groupId>org.apache.parquet</groupId>
|
||||
<artifactId>parquet-avro</artifactId>
|
||||
<version>${parquet.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hbase</groupId>
|
||||
<artifactId>hbase-client</artifactId>
|
||||
<version>${hbase.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>io.netty</groupId>
|
||||
<artifactId>netty-all</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>jdk.tools</groupId>
|
||||
<artifactId>jdk.tools</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.spark</groupId>
|
||||
<artifactId>spark-avro_${scala.binary.version}</artifactId>
|
||||
<version>${spark.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark3_${scala.binary.version}</artifactId>
|
||||
<version>${hudi.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark-common_2.11</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark-common_${scala.binary.version}</artifactId>
|
||||
<version>${hudi.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-yarn-server-common</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark_${scala.binary.version}</artifactId>
|
||||
<version>${hudi.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark2_2.11</artifactId>
|
||||
</exclusion>
|
||||
<exclusion>
|
||||
<groupId>org.apache.hudi</groupId>
|
||||
<artifactId>hudi-spark-common_2.11</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.delta</groupId>
|
||||
<artifactId>delta-core_${scala.binary.version}</artifactId>
|
||||
@ -1589,7 +1661,7 @@
|
||||
<properties>
|
||||
<spark.version>3.1.2</spark.version>
|
||||
<delta.version>1.0.0</delta.version>
|
||||
<maven.plugin.scalatest.exclude.tags>org.apache.kyuubi.tags.ExtendedSQLTest</maven.plugin.scalatest.exclude.tags>
|
||||
<maven.plugin.scalatest.exclude.tags>org.apache.kyuubi.tags.ExtendedSQLTest,org.apache.kyuubi.tags.HudiTest</maven.plugin.scalatest.exclude.tags>
|
||||
</properties>
|
||||
</profile>
|
||||
|
||||
@ -1597,7 +1669,7 @@
|
||||
<id>spark-master</id>
|
||||
<properties>
|
||||
<spark.version>3.2.0-SNAPSHOT</spark.version>
|
||||
<maven.plugin.scalatest.exclude.tags>org.apache.kyuubi.tags.ExtendedSQLTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest</maven.plugin.scalatest.exclude.tags>
|
||||
<maven.plugin.scalatest.exclude.tags>org.apache.kyuubi.tags.ExtendedSQLTest,org.apache.kyuubi.tags.DeltaTest,org.apache.kyuubi.tags.IcebergTest,org.apache.kyuubi.tags.HudiTest</maven.plugin.scalatest.exclude.tags>
|
||||
</properties>
|
||||
</profile>
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user