From 9ce0aa10fee3a436c39580886e99ffd3ae5d674a Mon Sep 17 00:00:00 2001 From: Cheng Pan <379377944@qq.com> Date: Sun, 30 May 2021 01:34:00 +0800 Subject: [PATCH] [KYUUBI #616] [TEST] [TPCDS] [DELTA] Migrate TPCDS test to Delta ### _Why are the changes needed?_ ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/latest/tools/testing.html#running-tests) locally before make a pull request Finally, delta 1.0.0 is out, with Spark 3.1.1 support. - In Spark 3.0, column of CHAR type is not allowed in non-Hive-Serde tables, and CREATE/ALTER TABLE commands will fail if CHAR type is detected. Please use STRING type instead. In Spark version 2.4 and below, CHAR type is treated as STRING type and the length parameter is simply ignored. - Since Spark 3.1, CHAR/CHARACTER and VARCHAR types are supported in the table schema. Table scan/insertion will respect the char/varchar semantic. If char/varchar is used in places other than table schema, an exception will be thrown (CAST is an exception that simply treats char/varchar as string like before). To restore the behavior before Spark 3.1, which treats them as STRING types and ignores a length parameter, e.g. `CHAR(4)`, you can set `spark.sql.legacy.charVarcharAsString` to `true`. Closes #616 from pan3793/tpcds. Closes #616 ed934178 [Cheng Pan] column order d772b43c [Cheng Pan] Fix TPCDS 71b70be7 [Cheng Pan] ci 9518b601 [Cheng Pan] [TEST] [TPCDS] [DELTA] Migrate TPCDS test to Delta Authored-by: Cheng Pan <379377944@qq.com> Signed-off-by: Cheng Pan <379377944@qq.com> --- .github/workflows/master.yml | 4 +-- .travis.yml | 6 ++--- .../operation/tpcds/DDLTPCDSSuite.scala | 26 ++++++++++--------- .../tpcds/OutputSchemaTPCDSSuite.scala | 23 +++++++++++----- .../kyuubi/operation/tpcds/TPCDSHelper.scala | 4 +-- pom.xml | 5 +++- 6 files changed, 41 insertions(+), 27 deletions(-) diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml index 4849de1fd..fe03c0e8a 100644 --- a/.github/workflows/master.yml +++ b/.github/workflows/master.yml @@ -123,5 +123,5 @@ jobs: ${{ runner.os }}-maven-io- - name: Run TPC-DS Tests run: | - mvn clean install --no-transfer-progress -Pspark-3.0 -DskipTests -pl :kyuubi-spark-sql-engine,:kyuubi-common,:kyuubi-ha,:kyuubi-zookeeper - mvn test --no-transfer-progress -Pspark-3.0 -Dtest=none -DwildcardSuites=*TPCDSSuite + mvn clean install --no-transfer-progress -Pspark-3.1 -DskipTests -pl :kyuubi-spark-sql-engine,:kyuubi-common,:kyuubi-ha,:kyuubi-zookeeper + mvn test --no-transfer-progress -Pspark-3.1 -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds -Dmaven.plugin.scalatest.exclude.tags='' diff --git a/.travis.yml b/.travis.yml index 71d328b84..497a0e0a9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -45,9 +45,9 @@ matrix: env: - PROFILE="-Pspark-master" - JOB="nightly-build" - - name: Test Kyuubi w/ -Pspark-3.0 TPCDS Tests + - name: Test Kyuubi w/ -Pspark-3.1 TPCDS Tests env: - - PROFILE="-Pspark-3.0" + - PROFILE="-Pspark-3.1 -Dmaven.plugin.scalatest.exclude.tags=''" - JOB="tpcds-tests" install: @@ -65,7 +65,7 @@ script: - | if [[ "$JOB" == "tpcds-tests" ]]; then mvn clean install --no-transfer-progress $PROFILE -DskipTests -pl :kyuubi-spark-sql-engine,:kyuubi-common,:kyuubi-ha,:kyuubi-zookeeper - mvn test --no-transfer-progress $PROFILE -Dtest=none -DwildcardSuites=*TPCDSSuite + mvn test --no-transfer-progress $PROFILE -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds fi after_success: diff --git a/kyuubi-main/src/test/scala/org/apache/kyuubi/operation/tpcds/DDLTPCDSSuite.scala b/kyuubi-main/src/test/scala/org/apache/kyuubi/operation/tpcds/DDLTPCDSSuite.scala index 28f66fc84..bbc27a710 100644 --- a/kyuubi-main/src/test/scala/org/apache/kyuubi/operation/tpcds/DDLTPCDSSuite.scala +++ b/kyuubi-main/src/test/scala/org/apache/kyuubi/operation/tpcds/DDLTPCDSSuite.scala @@ -17,19 +17,26 @@ package org.apache.kyuubi.operation.tpcds -import org.apache.kyuubi.WithKyuubiServer +import org.apache.kyuubi.{DeltaSuiteMixin, WithKyuubiServer} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.operation.JDBCTestUtils -import org.apache.kyuubi.tags.ExtendedSQLTest +import org.apache.kyuubi.tags.{DeltaTest, ExtendedSQLTest} +@DeltaTest @ExtendedSQLTest -class DDLTPCDSSuite extends WithKyuubiServer with JDBCTestUtils with TPCDSHelper { - override protected val conf: KyuubiConf = KyuubiConf() +class DDLTPCDSSuite extends WithKyuubiServer + with JDBCTestUtils + with TPCDSHelper + with DeltaSuiteMixin { + + override protected val conf: KyuubiConf = { + val kyuubiConf = KyuubiConf().set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 20000L) + extraConfigs.foreach { case (k, v) => kyuubiConf.set(k, v) } + kyuubiConf + } override protected def jdbcUrl: String = getJdbcUrl - override def format: String = "hive OPTIONS(fileFormat='parquet')" - override def database: String = this.getClass.getSimpleName override def beforeAll(): Unit = { @@ -53,12 +60,7 @@ class DDLTPCDSSuite extends WithKyuubiServer with JDBCTestUtils with TPCDSHelper val resultSet = statement.executeQuery(s"SELECT * FROM ${tableDef.table.qualifiedName}") assert(!resultSet.next()) val meta = resultSet.getMetaData - val fields = if (tableDef.fields.head.isPartitionKey) { - tableDef.fields.tail :+ tableDef.fields.head - } else { - tableDef.fields - } - fields.zipWithIndex.foreach { case (f, i) => + tableDef.fields.zipWithIndex.foreach { case (f, i) => assert(meta.getColumnName(i + 1) === f.name) } } diff --git a/kyuubi-main/src/test/scala/org/apache/kyuubi/operation/tpcds/OutputSchemaTPCDSSuite.scala b/kyuubi-main/src/test/scala/org/apache/kyuubi/operation/tpcds/OutputSchemaTPCDSSuite.scala index 5066c56ac..7c5f9d200 100644 --- a/kyuubi-main/src/test/scala/org/apache/kyuubi/operation/tpcds/OutputSchemaTPCDSSuite.scala +++ b/kyuubi-main/src/test/scala/org/apache/kyuubi/operation/tpcds/OutputSchemaTPCDSSuite.scala @@ -20,30 +20,39 @@ package org.apache.kyuubi.operation.tpcds import java.nio.charset.Charset import java.nio.file.{Files, Path, Paths} -import org.apache.kyuubi.WithKyuubiServer +import org.apache.kyuubi.{DeltaSuiteMixin, WithKyuubiServer} import org.apache.kyuubi.config.KyuubiConf import org.apache.kyuubi.operation.JDBCTestUtils -import org.apache.kyuubi.tags.ExtendedSQLTest +import org.apache.kyuubi.tags.{DeltaTest, ExtendedSQLTest} // scalastyle:off line.size.limit /** * To run this test suite: * {{{ - * build/mvn test -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds.OutputSchemaTPCDSSuite + * build/mvn test -Pspark-3.1 -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds.OutputSchemaTPCDSSuite -Dmaven.plugin.scalatest.exclude.tags='' * }}} * * To re-generate golden files for this suite: * {{{ - * KYUUBI_UPDATE=1 build/mvn test -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds.OutputSchemaTPCDSSuite + * KYUUBI_UPDATE=1 build/mvn test -Pspark-3.1 -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds.OutputSchemaTPCDSSuite -Dmaven.plugin.scalatest.exclude.tags='' * }}} */ // scalastyle:on line.size.limit +@DeltaTest @ExtendedSQLTest -class OutputSchemaTPCDSSuite extends WithKyuubiServer with JDBCTestUtils with TPCDSHelper { - override protected val conf: KyuubiConf = KyuubiConf() +class OutputSchemaTPCDSSuite extends WithKyuubiServer + with JDBCTestUtils + with TPCDSHelper + with DeltaSuiteMixin { + + override protected val conf: KyuubiConf = { + val kyuubiConf = KyuubiConf().set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 20000L) + extraConfigs.foreach { case (k, v) => kyuubiConf.set(k, v) } + kyuubiConf + } + override protected def jdbcUrl: String = getJdbcUrl override def database: String = this.getClass.getSimpleName - override def format: String = "hive OPTIONS(fileFormat='parquet')" private val queryNameReg = """([a-z]+)(\d+)""".r("head", "index") override def beforeAll(): Unit = { diff --git a/kyuubi-main/src/test/scala/org/apache/kyuubi/operation/tpcds/TPCDSHelper.scala b/kyuubi-main/src/test/scala/org/apache/kyuubi/operation/tpcds/TPCDSHelper.scala index 1d86f30a3..6404dfea8 100644 --- a/kyuubi-main/src/test/scala/org/apache/kyuubi/operation/tpcds/TPCDSHelper.scala +++ b/kyuubi-main/src/test/scala/org/apache/kyuubi/operation/tpcds/TPCDSHelper.scala @@ -19,8 +19,8 @@ package org.apache.kyuubi.operation.tpcds trait TPCDSHelper { - protected def format: String = "parquet" - protected def database: String = "default" + protected def format: String + protected def database: String case class TableIdent(name: String) { def qualifiedName: String = database + "." + name diff --git a/pom.xml b/pom.xml index 54f5e43bd..47efc14c8 100644 --- a/pom.xml +++ b/pom.xml @@ -1567,7 +1567,7 @@ 3.0.2 0.8.0 - + org.apache.kyuubi.tags.ExtendedSQLTest @@ -1590,6 +1590,9 @@ spark-hadoop-2.7 + + 2.7 +