[KYUUBI #616] [TEST] [TPCDS] [DELTA] Migrate TPCDS test to Delta

<!--
Thanks for sending a pull request!

Here are some tips for you:
  1. If this is your first time, please read our contributor guidelines: https://kyuubi.readthedocs.io/en/latest/community/contributions.html
  2. If the PR is related to an issue in https://github.com/NetEase/kyuubi/issues, add '[KYUUBI #XXXX]' in your PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'.
  3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][KYUUBI #XXXX] Your PR title ...'.
-->

### _Why are the changes needed?_
<!--
Please clarify why the changes are needed. For instance,
  1. If you add a feature, you can talk about the use case of it.
  2. If you fix a bug, you can clarify why it is a bug.
-->

### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [x] [Run test](https://kyuubi.readthedocs.io/en/latest/tools/testing.html#running-tests) locally before make a pull request

Finally, delta 1.0.0 is out, with Spark 3.1.1 support.

- In Spark 3.0, column of CHAR type is not allowed in non-Hive-Serde tables, and CREATE/ALTER TABLE commands will fail if CHAR type is detected. Please use STRING type instead. In Spark version 2.4 and below, CHAR type is treated as STRING type and the length parameter is simply ignored.

- Since Spark 3.1, CHAR/CHARACTER and VARCHAR types are supported in the table schema. Table scan/insertion will respect the char/varchar semantic. If char/varchar is used in places other than table schema, an exception will be thrown (CAST is an exception that simply treats char/varchar as string like before). To restore the behavior before Spark 3.1, which treats them as STRING types and ignores a length parameter, e.g. `CHAR(4)`, you can set `spark.sql.legacy.charVarcharAsString` to `true`.

Closes #616 from pan3793/tpcds.

Closes #616

ed934178 [Cheng Pan] column order
d772b43c [Cheng Pan] Fix TPCDS
71b70be7 [Cheng Pan] ci
9518b601 [Cheng Pan] [TEST] [TPCDS] [DELTA] Migrate TPCDS test to Delta

Authored-by: Cheng Pan <379377944@qq.com>
Signed-off-by: Cheng Pan <379377944@qq.com>
This commit is contained in:
Cheng Pan 2021-05-30 01:34:00 +08:00
parent 24e8f7b639
commit 9ce0aa10fe
6 changed files with 41 additions and 27 deletions

View File

@ -123,5 +123,5 @@ jobs:
${{ runner.os }}-maven-io-
- name: Run TPC-DS Tests
run: |
mvn clean install --no-transfer-progress -Pspark-3.0 -DskipTests -pl :kyuubi-spark-sql-engine,:kyuubi-common,:kyuubi-ha,:kyuubi-zookeeper
mvn test --no-transfer-progress -Pspark-3.0 -Dtest=none -DwildcardSuites=*TPCDSSuite
mvn clean install --no-transfer-progress -Pspark-3.1 -DskipTests -pl :kyuubi-spark-sql-engine,:kyuubi-common,:kyuubi-ha,:kyuubi-zookeeper
mvn test --no-transfer-progress -Pspark-3.1 -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds -Dmaven.plugin.scalatest.exclude.tags=''

View File

@ -45,9 +45,9 @@ matrix:
env:
- PROFILE="-Pspark-master"
- JOB="nightly-build"
- name: Test Kyuubi w/ -Pspark-3.0 TPCDS Tests
- name: Test Kyuubi w/ -Pspark-3.1 TPCDS Tests
env:
- PROFILE="-Pspark-3.0"
- PROFILE="-Pspark-3.1 -Dmaven.plugin.scalatest.exclude.tags=''"
- JOB="tpcds-tests"
install:
@ -65,7 +65,7 @@ script:
- |
if [[ "$JOB" == "tpcds-tests" ]]; then
mvn clean install --no-transfer-progress $PROFILE -DskipTests -pl :kyuubi-spark-sql-engine,:kyuubi-common,:kyuubi-ha,:kyuubi-zookeeper
mvn test --no-transfer-progress $PROFILE -Dtest=none -DwildcardSuites=*TPCDSSuite
mvn test --no-transfer-progress $PROFILE -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds
fi
after_success:

View File

@ -17,19 +17,26 @@
package org.apache.kyuubi.operation.tpcds
import org.apache.kyuubi.WithKyuubiServer
import org.apache.kyuubi.{DeltaSuiteMixin, WithKyuubiServer}
import org.apache.kyuubi.config.KyuubiConf
import org.apache.kyuubi.operation.JDBCTestUtils
import org.apache.kyuubi.tags.ExtendedSQLTest
import org.apache.kyuubi.tags.{DeltaTest, ExtendedSQLTest}
@DeltaTest
@ExtendedSQLTest
class DDLTPCDSSuite extends WithKyuubiServer with JDBCTestUtils with TPCDSHelper {
override protected val conf: KyuubiConf = KyuubiConf()
class DDLTPCDSSuite extends WithKyuubiServer
with JDBCTestUtils
with TPCDSHelper
with DeltaSuiteMixin {
override protected val conf: KyuubiConf = {
val kyuubiConf = KyuubiConf().set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 20000L)
extraConfigs.foreach { case (k, v) => kyuubiConf.set(k, v) }
kyuubiConf
}
override protected def jdbcUrl: String = getJdbcUrl
override def format: String = "hive OPTIONS(fileFormat='parquet')"
override def database: String = this.getClass.getSimpleName
override def beforeAll(): Unit = {
@ -53,12 +60,7 @@ class DDLTPCDSSuite extends WithKyuubiServer with JDBCTestUtils with TPCDSHelper
val resultSet = statement.executeQuery(s"SELECT * FROM ${tableDef.table.qualifiedName}")
assert(!resultSet.next())
val meta = resultSet.getMetaData
val fields = if (tableDef.fields.head.isPartitionKey) {
tableDef.fields.tail :+ tableDef.fields.head
} else {
tableDef.fields
}
fields.zipWithIndex.foreach { case (f, i) =>
tableDef.fields.zipWithIndex.foreach { case (f, i) =>
assert(meta.getColumnName(i + 1) === f.name)
}
}

View File

@ -20,30 +20,39 @@ package org.apache.kyuubi.operation.tpcds
import java.nio.charset.Charset
import java.nio.file.{Files, Path, Paths}
import org.apache.kyuubi.WithKyuubiServer
import org.apache.kyuubi.{DeltaSuiteMixin, WithKyuubiServer}
import org.apache.kyuubi.config.KyuubiConf
import org.apache.kyuubi.operation.JDBCTestUtils
import org.apache.kyuubi.tags.ExtendedSQLTest
import org.apache.kyuubi.tags.{DeltaTest, ExtendedSQLTest}
// scalastyle:off line.size.limit
/**
* To run this test suite:
* {{{
* build/mvn test -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds.OutputSchemaTPCDSSuite
* build/mvn test -Pspark-3.1 -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds.OutputSchemaTPCDSSuite -Dmaven.plugin.scalatest.exclude.tags=''
* }}}
*
* To re-generate golden files for this suite:
* {{{
* KYUUBI_UPDATE=1 build/mvn test -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds.OutputSchemaTPCDSSuite
* KYUUBI_UPDATE=1 build/mvn test -Pspark-3.1 -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds.OutputSchemaTPCDSSuite -Dmaven.plugin.scalatest.exclude.tags=''
* }}}
*/
// scalastyle:on line.size.limit
@DeltaTest
@ExtendedSQLTest
class OutputSchemaTPCDSSuite extends WithKyuubiServer with JDBCTestUtils with TPCDSHelper {
override protected val conf: KyuubiConf = KyuubiConf()
class OutputSchemaTPCDSSuite extends WithKyuubiServer
with JDBCTestUtils
with TPCDSHelper
with DeltaSuiteMixin {
override protected val conf: KyuubiConf = {
val kyuubiConf = KyuubiConf().set(KyuubiConf.ENGINE_IDLE_TIMEOUT, 20000L)
extraConfigs.foreach { case (k, v) => kyuubiConf.set(k, v) }
kyuubiConf
}
override protected def jdbcUrl: String = getJdbcUrl
override def database: String = this.getClass.getSimpleName
override def format: String = "hive OPTIONS(fileFormat='parquet')"
private val queryNameReg = """([a-z]+)(\d+)""".r("head", "index")
override def beforeAll(): Unit = {

View File

@ -19,8 +19,8 @@ package org.apache.kyuubi.operation.tpcds
trait TPCDSHelper {
protected def format: String = "parquet"
protected def database: String = "default"
protected def format: String
protected def database: String
case class TableIdent(name: String) {
def qualifiedName: String = database + "." + name

View File

@ -1567,7 +1567,7 @@
<properties>
<spark.version>3.0.2</spark.version>
<delta.version>0.8.0</delta.version>
<maven.plugin.scalatest.exclude.tags></maven.plugin.scalatest.exclude.tags>
<maven.plugin.scalatest.exclude.tags>org.apache.kyuubi.tags.ExtendedSQLTest</maven.plugin.scalatest.exclude.tags>
</properties>
</profile>
@ -1590,6 +1590,9 @@
<profile>
<id>spark-hadoop-2.7</id>
<properties>
<hadoop.binary.version>2.7</hadoop.binary.version>
</properties>
</profile>
<profile>