[KYUUBI #613] [TEST] [DELTA] Add test for deltalake

<!--
Thanks for sending a pull request!

Here are some tips for you:
  1. If this is your first time, please read our contributor guidelines: https://kyuubi.readthedocs.io/en/latest/community/contributions.html
  2. If the PR is related to an issue in https://github.com/NetEase/kyuubi/issues, add '[KYUUBI #XXXX]' in your PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'.
  3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][KYUUBI #XXXX] Your PR title ...'.
-->

### _Why are the changes needed?_
<!--
Please clarify why the changes are needed. For instance,
  1. If you add a feature, you can talk about the use case of it.
  2. If you fix a bug, you can clarify why it is a bug.
-->

### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [ ] [Run test](https://kyuubi.readthedocs.io/en/latest/tools/testing.html#running-tests) locally before make a pull request

Closes #613 from pan3793/delta.

Closes #613

ccadedb [Cheng Pan] [TEST] [DELTA] Add test for deltalake

Authored-by: Cheng Pan <379377944@qq.com>
Signed-off-by: Kent Yao <yao@apache.org>
This commit is contained in:
Cheng Pan 2021-04-28 20:14:00 +08:00 committed by Kent Yao
parent 20911ff73c
commit 34374f3b3e
No known key found for this signature in database
GPG Key ID: F7051850A0AF904D
10 changed files with 361 additions and 12 deletions

View File

@ -19,7 +19,7 @@ package org.apache.kyuubi.tpcds
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.slf4j.{Logger, LoggerFactory}
import org.slf4j.LoggerFactory
object DataGenerator {
private val logger =

View File

@ -128,6 +128,12 @@
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.delta</groupId>
<artifactId>delta-core_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.curator</groupId>
<artifactId>curator-test</artifactId>

View File

@ -123,7 +123,7 @@ trait SparkCatalogShim extends Logging {
methodName: String,
args: (Class[_], AnyRef)*): Any = {
val (types, values) = args.unzip
val method = obj.getClass.getDeclaredMethod(methodName, types: _*)
val method = obj.getClass.getMethod(methodName, types: _*)
method.setAccessible(true)
method.invoke(obj, values.toSeq: _*)
}
@ -134,7 +134,7 @@ trait SparkCatalogShim extends Logging {
methodName: String,
args: (Class[_], AnyRef)*): AnyRef = {
val (types, values) = args.unzip
val method = clazz.getDeclaredMethod(methodName, types: _*)
val method = clazz.getMethod(methodName, types: _*)
method.setAccessible(true)
method.invoke(obj, values.toSeq: _*)
}

View File

@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kyuubi.engine.spark.operation
import org.apache.kyuubi.engine.spark.WithSparkSQLEngine
import org.apache.kyuubi.operation.BasicDeltaJDBCTests
import org.apache.kyuubi.tags.DataLakeTest
@DataLakeTest
class SparkDeltaOperationSuite extends WithSparkSQLEngine with BasicDeltaJDBCTests {
override protected def jdbcUrl: String = getJdbcUrl
override def withKyuubiConf: Map[String, String] = deltaConfigs
override def afterAll(): Unit = {
super.afterAll()
for ((k, _) <- deltaConfigs) {
System.clearProperty(k)
}
}
}

View File

@ -0,0 +1,263 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kyuubi.operation
import java.nio.file.Path
import org.apache.kyuubi.Utils
import org.apache.kyuubi.operation.meta.ResultSetSchemaConstant._
trait BasicDeltaJDBCTests extends JDBCTestUtils {
protected val deltaJar: String = {
System.getProperty("java.class.path")
.split(":")
.filter(_.contains("delta-core")).head
}
protected val warehouse: Path = Utils.createTempDir()
protected val deltaConfigs = Map(
"spark.sql.extensions" -> "io.delta.sql.DeltaSparkSessionExtension",
"spark.sql.catalog.spark_catalog" -> "org.apache.spark.sql.delta.catalog.DeltaCatalog",
"spark.jars" -> deltaJar)
test("get catalogs") {
withJdbcStatement() { statement =>
val metaData = statement.getConnection.getMetaData
val catalogs = metaData.getCatalogs
catalogs.next()
assert(catalogs.getString(TABLE_CAT) === "spark_catalog")
assert(!catalogs.next())
}
}
test("get schemas") {
val dbs = Seq("db1", "db2", "db33", "db44")
val dbDflts = Seq("default", "global_temp")
val catalog = "spark_catalog"
withDatabases(dbs: _*) { statement =>
dbs.foreach(db => statement.execute(s"CREATE DATABASE IF NOT EXISTS $db"))
val metaData = statement.getConnection.getMetaData
Seq("", "*", "%", null, ".*", "_*", "_%", ".%") foreach { pattern =>
checkGetSchemas(metaData.getSchemas(catalog, pattern), dbs ++ dbDflts, catalog)
}
Seq("db%", "db.*") foreach { pattern =>
checkGetSchemas(metaData.getSchemas(catalog, pattern), dbs, catalog)
}
Seq("db_", "db.") foreach { pattern =>
checkGetSchemas(metaData.getSchemas(catalog, pattern), dbs.take(2), catalog)
}
checkGetSchemas(metaData.getSchemas(catalog, "db1"), Seq("db1"), catalog)
checkGetSchemas(metaData.getSchemas(catalog, "db_not_exist"), Seq.empty, catalog)
}
}
test("get tables") {
val table = "table_1_test"
val schema = "default"
val tableType = "TABLE"
withJdbcStatement(table) { statement =>
statement.execute(
s"CREATE TABLE IF NOT EXISTS $table(key int) USING delta COMMENT '$table'")
val metaData = statement.getConnection.getMetaData
val rs1 = metaData.getTables(null, null, null, null)
assert(rs1.next())
val catalogName = rs1.getString(TABLE_CAT)
assert(catalogName === "spark_catalog" || catalogName === null)
assert(rs1.getString(TABLE_SCHEM) === schema)
assert(rs1.getString(TABLE_NAME) == table)
assert(rs1.getString(TABLE_TYPE) == tableType)
assert(rs1.getString(REMARKS) === table)
assert(!rs1.next())
val rs2 = metaData.getTables(null, null, "table%", Array("TABLE"))
assert(rs2.next())
assert(rs2.getString(TABLE_NAME) == table)
assert(!rs2.next())
val rs3 = metaData.getTables(null, "default", "*", Array("VIEW"))
assert(!rs3.next())
}
}
test("get type info") {
withJdbcStatement() { statement =>
val typeInfo = statement.getConnection.getMetaData.getTypeInfo
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "VOID")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.NULL)
assert(typeInfo.getInt(PRECISION) === 0)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 3)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 0)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "BOOLEAN")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.BOOLEAN)
assert(typeInfo.getInt(PRECISION) === 0)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 3)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 0)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "TINYINT")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.TINYINT)
assert(typeInfo.getInt(PRECISION) === 3)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 3)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 10)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "SMALLINT")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.SMALLINT)
assert(typeInfo.getInt(PRECISION) === 5)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 3)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 10)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "INTEGER")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.INTEGER)
assert(typeInfo.getInt(PRECISION) === 10)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 3)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 10)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "BIGINT")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.BIGINT)
assert(typeInfo.getInt(PRECISION) === 19)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 3)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 10)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "FLOAT")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.FLOAT)
assert(typeInfo.getInt(PRECISION) === 7)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 3)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 10)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "DOUBLE")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.DOUBLE)
assert(typeInfo.getInt(PRECISION) === 15)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 3)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 10)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "STRING")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.VARCHAR)
assert(typeInfo.getInt(PRECISION) === 0)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 3)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 0)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "BINARY")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.BINARY)
assert(typeInfo.getInt(PRECISION) === 0)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 3)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 0)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "DECIMAL")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.DECIMAL)
assert(typeInfo.getInt(PRECISION) === 38)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 3)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 10)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "DATE")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.DATE)
assert(typeInfo.getInt(PRECISION) === 0)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 3)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 0)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "TIMESTAMP")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.TIMESTAMP)
assert(typeInfo.getInt(PRECISION) === 0)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 3)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 0)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "ARRAY")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.ARRAY)
assert(typeInfo.getInt(PRECISION) === 0)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 0)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 0)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "MAP")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.JAVA_OBJECT)
assert(typeInfo.getInt(PRECISION) === 0)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 0)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 0)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "STRUCT")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.STRUCT)
assert(typeInfo.getInt(PRECISION) === 0)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 0)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 0)
typeInfo.next()
assert(typeInfo.getString(TYPE_NAME) === "INTERVAL")
assert(typeInfo.getInt(DATA_TYPE) === java.sql.Types.OTHER)
assert(typeInfo.getInt(PRECISION) === 0)
assert(typeInfo.getShort(NULLABLE) === 1)
assert(!typeInfo.getBoolean(CASE_SENSITIVE))
assert(typeInfo.getShort(SEARCHABLE) === 0)
assert(typeInfo.getInt(NUM_PREC_RADIX) === 0)
}
}
}

View File

@ -26,7 +26,7 @@ trait BasicIcebergJDBCTests extends JDBCTestUtils {
protected def catalog: String = "hadoop_prod"
protected val iceberg: String = {
protected val icebergJar: String = {
System.getProperty("java.class.path")
.split(":")
.filter(_.contains("iceberg-spark")).head
@ -42,7 +42,7 @@ trait BasicIcebergJDBCTests extends JDBCTestUtils {
s"spark.sql.catalog.$catalog" -> "org.apache.iceberg.spark.SparkCatalog",
s"spark.sql.catalog.$catalog.type" -> "hadoop",
s"spark.sql.catalog.$catalog.warehouse" -> warehouse.toString,
"spark.jars" -> iceberg)
"spark.jars" -> icebergJar)
test("get catalogs") {
withJdbcStatement() { statement =>
@ -128,16 +128,16 @@ trait BasicIcebergJDBCTests extends JDBCTestUtils {
dbs.foreach(db => statement.execute(s"CREATE NAMESPACE IF NOT EXISTS $db"))
val metaData = statement.getConnection.getMetaData
Seq("hadoop_prod").foreach { catalog =>
Seq(catalog).foreach { cg =>
dbs.foreach { db =>
try {
statement.execute(
s"CREATE TABLE IF NOT EXISTS $catalog.$db.tbl(c STRING) USING iceberg")
s"CREATE TABLE IF NOT EXISTS $cg.$db.tbl(c STRING) USING iceberg")
val rs1 = metaData.getTables(catalog, db, "%", null)
val rs1 = metaData.getTables(cg, db, "%", null)
while (rs1.next()) {
val catalogName = rs1.getString(TABLE_CAT)
assert(catalogName === catalog)
assert(catalogName === cg)
assert(rs1.getString(TABLE_SCHEM) === db)
assert(rs1.getString(TABLE_NAME) === "tbl")
assert(rs1.getString(TABLE_TYPE) == "TABLE")
@ -145,7 +145,7 @@ trait BasicIcebergJDBCTests extends JDBCTestUtils {
}
assert(!rs1.next())
} finally {
statement.execute(s"DROP TABLE IF EXISTS $catalog.$db.tbl")
statement.execute(s"DROP TABLE IF EXISTS $cg.$db.tbl")
}
}
}

View File

@ -144,8 +144,7 @@ trait JDBCTestUtils extends KyuubiFunSuite {
}
}
def checkGetSchemas(
rs: ResultSet, dbNames: Seq[String], catalogName: String = ""): Unit = {
def checkGetSchemas(rs: ResultSet, dbNames: Seq[String], catalogName: String = ""): Unit = {
var count = 0
while(rs.next()) {
count += 1

View File

@ -138,6 +138,12 @@
<artifactId>${iceberg.name}</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.delta</groupId>
<artifactId>delta-core_${scala.binary.version}</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>

View File

@ -0,0 +1,33 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kyuubi.operation.datalake
import org.apache.kyuubi.config.KyuubiConf
import org.apache.kyuubi.operation.{BasicDeltaJDBCTests, WithKyuubiServer}
import org.apache.kyuubi.tags.DataLakeTest
@DataLakeTest
class DeltaOperationSuite extends WithKyuubiServer with BasicDeltaJDBCTests {
override protected val conf: KyuubiConf = {
val kyuubiConf = KyuubiConf().set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 20000L)
deltaConfigs.foreach {case (k, v) => kyuubiConf.set(k, v) }
kyuubiConf
}
override def jdbcUrl: String = getJdbcUrl
}

View File

@ -67,6 +67,7 @@
<commons-collections.version>3.2.2</commons-collections.version>
<commons-lang3.version>3.10</commons-lang3.version>
<curator.version>2.12.0</curator.version>
<delta.version>0.8.0</delta.version>
<guava.version>30.1-jre</guava.version>
<hadoop.version>3.2.2</hadoop.version>
<hadoop.binary.version>2.7</hadoop.binary.version>
@ -1043,6 +1044,12 @@
<artifactId>${iceberg.name}</artifactId>
<version>${iceberg.version}</version>
</dependency>
<dependency>
<groupId>io.delta</groupId>
<artifactId>delta-core_${scala.binary.version}</artifactId>
<version>${delta.version}</version>
</dependency>
</dependencies>
</dependencyManagement>