[KYUUBI #566] Add TPCDS query output schema check (#566)

### _Why are the changes needed?_
<!--
Please clarify why the changes are needed. For instance,
  1. If you add a feature, you can talk about the use case of it.
  2. If you fix a bug, you can clarify why it is a bug.
-->
Use TPCDS query to check if we have some behavior change.

Note that 
1. this PR only add `q1`.
2. this PR just to check the output schema. The output data check can be added in future.
3. this new added test file structure looks like
```
tpcds-1_4
  - q1
    - q1.sql
    - q1.output.schema 
    - q1.output.data (if we support this in future)
  - q2
  ......
```

### _How was this patch tested?_
Add new test
This commit is contained in:
ulysses 2021-04-22 21:52:57 +08:00 committed by GitHub
parent 913d3f19d4
commit fd677f14eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 158 additions and 2 deletions

View File

@ -16,7 +16,7 @@
#
# Set everything to be logged to the file target/unit-tests.log
log4j.rootLogger=DEBUG, CA, FA
log4j.rootLogger=INFO, CA, FA
#Console Appender
log4j.appender.CA=org.apache.log4j.ConsoleAppender
@ -32,7 +32,7 @@ log4j.appender.FA.layout=org.apache.log4j.PatternLayout
log4j.appender.FA.layout.ConversionPattern=%d{HH:mm:ss.SSS} %t %p %c{2}: %m%n
# Set the logger level of File Appender to WARN
log4j.appender.FA.Threshold = DEBUG
log4j.appender.FA.Threshold = INFO
# SPARK-34128Suppress undesirable TTransportException warnings involved in THRIFT-4805
log4j.appender.console.filter.1=org.apache.log4j.varia.StringMatchFilter

View File

@ -0,0 +1 @@
struct<c_customer_id:string>

View File

@ -0,0 +1,36 @@
--
-- Licensed to the Apache Software Foundation (ASF) under one or more
-- contributor license agreements. See the NOTICE file distributed with
-- this work for additional information regarding copyright ownership.
-- The ASF licenses this file to You under the Apache License, Version 2.0
-- (the "License"); you may not use this file except in compliance with
-- the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing, software
-- distributed under the License is distributed on an "AS IS" BASIS,
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-- See the License for the specific language governing permissions and
-- limitations under the License.
--
WITH customer_total_return AS
( SELECT
sr_customer_sk AS ctr_customer_sk,
sr_store_sk AS ctr_store_sk,
sum(sr_return_amt) AS ctr_total_return
FROM store_returns, date_dim
WHERE sr_returned_date_sk = d_date_sk AND d_year = 2000
GROUP BY sr_customer_sk, sr_store_sk)
SELECT c_customer_id
FROM customer_total_return ctr1, store, customer
WHERE ctr1.ctr_total_return >
(SELECT avg(ctr_total_return) * 1.2
FROM customer_total_return ctr2
WHERE ctr1.ctr_store_sk = ctr2.ctr_store_sk)
AND s_store_sk = ctr1.ctr_store_sk
AND s_state = 'TN'
AND ctr1.ctr_customer_sk = c_customer_sk
ORDER BY c_customer_id
LIMIT 100

View File

@ -0,0 +1,118 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kyuubi.operation.tpcds
import java.nio.charset.Charset
import java.nio.file.{Files, Path, Paths}
import org.apache.kyuubi.config.KyuubiConf
import org.apache.kyuubi.operation.{JDBCTestUtils, WithKyuubiServer}
// scalastyle:off line.size.limit
/**
* To run this test suite:
* {{{
* build/mvn -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds.TPCDSOutputSchemaSuite test
* }}}
*
* To re-generate golden files for this suite:
* {{{
* KYUUBI_UPDATE=1 build/mvn -Dtest=none -DwildcardSuites=org.apache.kyuubi.operation.tpcds.TPCDSOutputSchemaSuite test
* }}}
*/
// scalastyle:on line.size.limit
class TPCDSOutputSchemaSuite extends WithKyuubiServer with JDBCTestUtils with TPCDSHelper {
override protected val conf: KyuubiConf = KyuubiConf()
override protected def jdbcUrl: String = getJdbcUrl
override def database: String = this.getClass.getSimpleName
override def format: String = "hive OPTIONS(fileFormat='parquet')"
override def beforeAll(): Unit = {
super.beforeAll()
withJdbcStatement() { statement =>
statement.execute(s"CREATE DATABASE IF NOT EXISTS $database")
tables.foreach { table =>
statement.execute(table.create)
}
}
}
override def afterAll(): Unit = {
withJdbcStatement() { statement =>
statement.execute(s"DROP DATABASE IF EXISTS $database CASCADE")
}
super.afterAll()
}
private val regenerateGoldenFiles = sys.env.get("KYUUBI_UPDATE").contains("1")
protected val baseResourcePath: Path = {
java.nio.file.Paths.get("src", "test", "resources")
}
private def fileToString(file: Path): String = {
new String(Files.readAllBytes(file), Charset.forName("UTF-8"))
}
private def runQuery(query: String, goldenFile: Path): Unit = {
withJdbcStatement() { statement =>
statement.execute(s"USE $database")
val result = statement.executeQuery(query)
try {
val columnTypes = (1 to result.getMetaData.getColumnCount).map { i =>
s"${result.getMetaData.getColumnName(i)}:${result.getMetaData.getColumnTypeName(i)}"
}.mkString("struct<", ",", ">\n")
if (regenerateGoldenFiles) {
Files.write(goldenFile, columnTypes.getBytes())
}
val expected = fileToString(goldenFile)
assert(columnTypes === expected)
} finally {
result.close()
}
}
}
private def runQueries(name: String): Unit = {
val queriesRoot = Thread.currentThread().getContextClassLoader.getResource(name)
val queries = Files.list(Paths.get(queriesRoot.toURI))
import scala.collection.JavaConverters._
val validQueries = queries.iterator().asScala.filter { query =>
Files.list(query).iterator().asScala.exists { q =>
q.getFileName.toString == s"${query.getFileName}.sql"
}
}
validQueries.foreach { q =>
test(q.getFileName.toString) {
val queryFile = Paths.get(q.toString, s"${q.getFileName}.sql")
val schemaFile = Paths.get(
baseResourcePath.toFile.getAbsolutePath,
name,
q.getFileName.toString,
s"${q.getFileName}.output.schema"
)
val queryString = fileToString(queryFile)
runQuery(queryString, schemaFile)
}
}
}
runQueries("tpcds-1_4")
}

View File

@ -1366,6 +1366,7 @@
<exclude>docs/**</exclude>
<exclude>build/scala-*/**</exclude>
<exclude>**/**/operation_logs/**/**</exclude>
<exclude>**/*.output.schema</exclude>
</excludes>
</configuration>
</plugin>