[KYUUBI #5275] [DOC] Improve and fix comparation and regeneration for golden files

### _Why are the changes needed?_

- Extract common assertion method for verifying file contents
- Ensure integrity of the file by comparing the line count
- Correct the script name for Spark engine KDF doc generation from `gen_kdf.sh` to `gen_spark_kdf_docs.sh`
- Add `gen_hive_kdf_docs.sh` script for Hive engine KDF doc generation
- Fix incorrect hints for Ranger spec file generation
- shows the line number of the incorrect file content
- Streamingly read file content by line with buffered support
- Regeneration hints:

<img width="656" alt="image" src="https://github.com/apache/kyuubi/assets/1935105/d1a7cb70-8b63-4fe9-ae27-80dadbe84799">

### _How was this patch tested?_
- [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [x] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request

### _Was this patch authored or co-authored using generative AI tooling?_

No.

Closes #5275 from bowenliang123/doc-regen-hint.

Closes #5275

9af97ab86 [Bowen Liang] implicit source position
07020c74d [liangbowen] assertFileContent

Lead-authored-by: liangbowen <liangbowen@gf.com.cn>
Co-authored-by: Bowen Liang <liangbowen@gf.com.cn>
Signed-off-by: Bowen Liang <liangbowen@gf.com.cn>
This commit is contained in:
liangbowen 2023-09-13 17:41:27 +08:00 committed by Bowen Liang
parent 724ae93989
commit d15322d568
13 changed files with 165 additions and 90 deletions

26
dev/gen/gen_hive_kdf_docs.sh Executable file
View File

@ -0,0 +1,26 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Golden result file:
# docs/extensions/engines/hive/functions.md
KYUUBI_UPDATE="${KYUUBI_UPDATE:-1}" \
build/mvn clean test \
-pl externals/kyuubi-hive-sql-engine -am \
-Pflink-provided,spark-provided,hive-provided \
-DwildcardSuites=org.apache.kyuubi.engine.hive.udf.KyuubiDefinedFunctionSuite

View File

@ -17,7 +17,7 @@
#
# Golden result file:
# docs/sql/functions.md
# docs/extensions/engines/spark/functions.md
KYUUBI_UPDATE="${KYUUBI_UPDATE:-1}" \
build/mvn clean test \

View File

@ -26,7 +26,6 @@ import com.fasterxml.jackson.databind.{JsonNode, ObjectMapper}
import com.fasterxml.jackson.databind.json.JsonMapper
import com.fasterxml.jackson.databind.node.ObjectNode
import com.fasterxml.jackson.module.scala.DefaultScalaModule
import org.apache.commons.io.FileUtils
import org.apache.ranger.plugin.model.RangerPolicy
import org.scalatest.funsuite.AnyFunSuite
@ -37,6 +36,7 @@ import org.apache.kyuubi.plugin.spark.authz.gen.KRangerPolicyItemAccess.allowTyp
import org.apache.kyuubi.plugin.spark.authz.gen.KRangerPolicyResource._
import org.apache.kyuubi.plugin.spark.authz.gen.RangerAccessType._
import org.apache.kyuubi.plugin.spark.authz.gen.RangerClassConversions._
import org.apache.kyuubi.util.AssertionUtils._
/**
* Generates the policy file to test/main/resources dir.
@ -77,12 +77,11 @@ class PolicyJsonFileGenerator extends AnyFunSuite {
StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING)
} else {
val existedFileContent =
FileUtils.readFileToString(policyFilePath.toFile, StandardCharsets.UTF_8)
withClue("Please regenerate the ranger policy file by running"
+ " `dev/gen/gen_ranger_policy_json.sh`") {
assert(generatedStr.equals(existedFileContent))
}
assertFileContent(
policyFilePath,
Seq(generatedStr),
"dev/gen/gen_ranger_policy_json.sh",
splitFirstExpectedLine = true)
}
}

View File

@ -20,11 +20,11 @@ package org.apache.kyuubi.plugin.spark.authz.gen
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Paths, StandardOpenOption}
import org.apache.commons.io.FileUtils
//scalastyle:off
import org.scalatest.funsuite.AnyFunSuite
import org.apache.kyuubi.plugin.spark.authz.serde.{mapper, CommandSpec}
import org.apache.kyuubi.util.AssertionUtils._
/**
* Generates the default command specs to src/main/resources dir.
@ -39,7 +39,6 @@ import org.apache.kyuubi.plugin.spark.authz.serde.{mapper, CommandSpec}
* dev/gen/gen_ranger_spec_json.sh
* }}}
*/
class JsonSpecFileGenerator extends AnyFunSuite {
// scalastyle:on
test("check spec json files") {
@ -70,12 +69,11 @@ class JsonSpecFileGenerator extends AnyFunSuite {
StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING)
} else {
val existedFileContent =
FileUtils.readFileToString(filePath.toFile, StandardCharsets.UTF_8)
withClue(s"Check $filename failed. Please regenerate the ranger policy file by running"
+ " `dev/gen/gen_ranger_spec_json.sh`.") {
assert(generatedStr.equals(existedFileContent))
}
assertFileContent(
filePath,
Seq(generatedStr),
"dev/gen/gen_ranger_spec_json.sh",
splitFirstExpectedLine = true)
}
}
}

View File

@ -28,7 +28,6 @@ import org.apache.kyuubi.{KyuubiFunSuite, Utils}
import org.apache.kyuubi.spark.connector.common.GoldenFileUtils._
import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession
// scalastyle:off line.size.limit
/**
* To run this test suite:
* {{{
@ -40,8 +39,6 @@ import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSessi
* dev/gen/gen_tpcds_queries.sh
* }}}
*/
// scalastyle:on line.size.limit
@Slow
class TPCDSQuerySuite extends KyuubiFunSuite {

View File

@ -28,7 +28,6 @@ import org.apache.kyuubi.{KyuubiFunSuite, Utils}
import org.apache.kyuubi.spark.connector.common.GoldenFileUtils._
import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession
// scalastyle:off line.size.limit
/**
* To run this test suite:
* {{{
@ -40,8 +39,6 @@ import org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSessi
* dev/gen/gen_tpcdh_queries.sh
* }}}
*/
// scalastyle:on line.size.limit
@Slow
class TPCHQuerySuite extends KyuubiFunSuite {

View File

@ -19,24 +19,23 @@ package org.apache.kyuubi.engine.hive.udf
import java.nio.file.Paths
import org.apache.kyuubi.{KyuubiFunSuite, MarkdownBuilder, MarkdownUtils, Utils}
import org.apache.kyuubi.{KyuubiFunSuite, MarkdownBuilder, Utils}
import org.apache.kyuubi.util.GoldenFileUtils._
// scalastyle:off line.size.limit
/**
* End-to-end test cases for configuration doc file
* The golden result file is "docs/sql/functions.md".
* The golden result file is "docs/extensions/engines/hive/functions.md".
*
* To run the entire test suite:
* {{{
* build/mvn clean test -pl externals/kyuubi-hive-sql-engine -am -Pflink-provided,spark-provided,hive-provided -DwildcardSuites=org.apache.kyuubi.engine.hive.udf.KyuubiDefinedFunctionSuite
* KYUUBI_UPDATE=0 dev/gen/gen_hive_kdf_docs.sh
* }}}
*
* To re-generate golden files for entire suite, run:
* {{{
* KYUUBI_UPDATE=1 build/mvn clean test -pl externals/kyuubi-hive-sql-engine -am -Pflink-provided,spark-provided,hive-provided -DwildcardSuites=org.apache.kyuubi.engine.hive.udf.KyuubiDefinedFunctionSuite
* dev/gen/gen_hive_kdf_docs.sh
* }}}
*/
// scalastyle:on line.size.limit
class KyuubiDefinedFunctionSuite extends KyuubiFunSuite {
private val kyuubiHome: String = Utils.getCodeSourceLocation(getClass)
@ -60,10 +59,6 @@ class KyuubiDefinedFunctionSuite extends KyuubiFunSuite {
builder += s"${func.name} | ${func.description} | ${func.returnType} | ${func.since}"
}
MarkdownUtils.verifyOutput(
markdown,
builder,
getClass.getCanonicalName,
"externals/kyuubi-hive-sql-engine")
verifyOrRegenerateGoldenFile(markdown, builder.toMarkdown, "dev/gen/gen_hive_kdf_docs.sh")
}
}

View File

@ -19,24 +19,23 @@ package org.apache.kyuubi.engine.spark.udf
import java.nio.file.Paths
import org.apache.kyuubi.{KyuubiFunSuite, MarkdownBuilder, MarkdownUtils, Utils}
import org.apache.kyuubi.{KyuubiFunSuite, MarkdownBuilder, Utils}
import org.apache.kyuubi.util.GoldenFileUtils._
// scalastyle:off line.size.limit
/**
* End-to-end test cases for configuration doc file
* The golden result file is "docs/sql/functions.md".
* The golden result file is "docs/extensions/engines/spark/functions.md".
*
* To run the entire test suite:
* {{{
* KYUUBI_UPDATE=0 dev/gen/gen_kdf.sh
* KYUUBI_UPDATE=0 dev/gen/gen_spark_kdf_docs.sh
* }}}
*
* To re-generate golden files for entire suite, run:
* {{{
* dev/gen/gen_kdf.sh
* dev/gen/gen_spark_kdf_docs.sh
* }}}
*/
// scalastyle:on line.size.limit
class KyuubiDefinedFunctionSuite extends KyuubiFunSuite {
private val kyuubiHome: String = Utils.getCodeSourceLocation(getClass)
@ -60,10 +59,6 @@ class KyuubiDefinedFunctionSuite extends KyuubiFunSuite {
builder += s"${func.name} | ${func.description} | ${func.returnType} | ${func.since}"
}
MarkdownUtils.verifyOutput(
markdown,
builder,
getClass.getCanonicalName,
"externals/kyuubi-spark-sql-engine")
verifyOrRegenerateGoldenFile(markdown, builder.toMarkdown, "dev/gen/gen_spark_kdf_docs.sh")
}
}

View File

@ -17,10 +17,6 @@
package org.apache.kyuubi
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Path, StandardOpenOption}
import scala.collection.JavaConverters._
import scala.collection.mutable.ListBuffer
import com.vladsch.flexmark.formatter.Formatter
@ -28,36 +24,6 @@ import com.vladsch.flexmark.parser.{Parser, ParserEmulationProfile, PegdownExten
import com.vladsch.flexmark.profile.pegdown.PegdownOptionsAdapter
import com.vladsch.flexmark.util.data.{MutableDataHolder, MutableDataSet}
import com.vladsch.flexmark.util.sequence.SequenceUtils.EOL
import org.scalatest.Assertions.{assertResult, withClue}
object MarkdownUtils {
def verifyOutput(
markdown: Path,
newOutput: MarkdownBuilder,
agent: String,
module: String): Unit = {
val formatted = newOutput.toMarkdown
if (System.getenv("KYUUBI_UPDATE") == "1") {
Files.write(
markdown,
formatted.asJava,
StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING)
} else {
Files.readAllLines(markdown, StandardCharsets.UTF_8).asScala.toStream
.zipWithIndex.zip(formatted).foreach { case ((lineInFile, lineIndex), formattedLine) =>
withClue(
s""" The markdown file ($markdown:${lineIndex + 1}) is out of date.
| Please update doc with KYUUBI_UPDATE=1 build/mvn clean test
| -pl $module -am -Pflink-provided,spark-provided,hive-provided
| -Dtest=none -DwildcardSuites=$agent \n""".stripMargin) {
assertResult(formattedLine)(lineInFile)
}
}
}
}
}
class MarkdownBuilder {
private val buffer = new ListBuffer[String]

View File

@ -21,14 +21,14 @@ import java.nio.file.Paths
import scala.collection.JavaConverters._
import org.apache.kyuubi.{KyuubiFunSuite, MarkdownBuilder, MarkdownUtils, Utils}
import org.apache.kyuubi.{KyuubiFunSuite, MarkdownBuilder, Utils}
import org.apache.kyuubi.ctl.CtlConf
import org.apache.kyuubi.ha.HighAvailabilityConf
import org.apache.kyuubi.metrics.MetricsConf
import org.apache.kyuubi.server.metadata.jdbc.JDBCMetadataStoreConf
import org.apache.kyuubi.util.GoldenFileUtils._
import org.apache.kyuubi.zookeeper.ZookeeperConf
// scalastyle:off line.size.limit
/**
* End-to-end test cases for configuration doc file
* The golden result file is "docs/configuration/settings.md".
@ -43,7 +43,6 @@ import org.apache.kyuubi.zookeeper.ZookeeperConf
* dev/gen/gen_all_config_docs.sh
* }}}
*/
// scalastyle:on line.size.limit
class AllKyuubiConfiguration extends KyuubiFunSuite {
private val kyuubiHome: String = Utils.getCodeSourceLocation(getClass).split("kyuubi-server")(0)
private val markdown = Paths.get(kyuubiHome, "docs", "configuration", "settings.md")
@ -229,6 +228,6 @@ class AllKyuubiConfiguration extends KyuubiFunSuite {
| executor and obey the Spark AQE behavior of Kyuubi system default. On the other hand,
| for those users who do not have custom configurations will use system defaults."""
MarkdownUtils.verifyOutput(markdown, builder, getClass.getCanonicalName, "kyuubi-server")
verifyOrRegenerateGoldenFile(markdown, builder.toMarkdown, "dev/gen/gen_all_config_docs.sh")
}
}

View File

@ -26,8 +26,9 @@ import org.apache.kyuubi.{DeltaSuiteMixin, WithKyuubiServer}
import org.apache.kyuubi.config.KyuubiConf
import org.apache.kyuubi.server.mysql.MySQLJDBCTestHelper
import org.apache.kyuubi.tags.DeltaTest
import org.apache.kyuubi.util.AssertionUtils._
import org.apache.kyuubi.util.GoldenFileUtils._
// scalastyle:off line.size.limit
/**
* To run this test suite:
* {{{
@ -39,7 +40,6 @@ import org.apache.kyuubi.tags.DeltaTest
* dev/gen/gen_tpcds_output_schema.sh
* }}}
*/
// scalastyle:on line.size.limit
@Slow
@DeltaTest
class OutputSchemaTPCDSSuite extends WithKyuubiServer
@ -74,7 +74,6 @@ class OutputSchemaTPCDSSuite extends WithKyuubiServer
super.afterAll()
}
private val regenerateGoldenFiles = sys.env.get("KYUUBI_UPDATE").contains("1")
protected val baseResourcePath: Path = Paths.get("src", "test", "resources")
private def fileToString(file: Path): String = {
@ -89,12 +88,15 @@ class OutputSchemaTPCDSSuite extends WithKyuubiServer
val columnTypes = (1 to result.getMetaData.getColumnCount).map { i =>
s"${result.getMetaData.getColumnName(i)}:${result.getMetaData.getColumnTypeName(i)}"
}.mkString("struct<", ",", ">\n")
if (regenerateGoldenFiles) {
if (isRegenerateGoldenFiles) {
Files.write(goldenFile, columnTypes.getBytes())
} else {
assertFileContent(
goldenFile,
Seq(columnTypes),
"dev/gen/gen_tpcds_output_schema.sh",
splitFirstExpectedLine = true)
}
val expected = fileToString(goldenFile)
assert(columnTypes === expected)
} finally {
result.close()
}

View File

@ -16,11 +16,15 @@
*/
package org.apache.kyuubi.util
import java.nio.charset.StandardCharsets
import java.nio.file.Path
import java.util.Locale
import scala.collection.Traversable
import scala.io.Source
import scala.reflect.ClassTag
import org.scalactic.source
import org.scalactic.{source, Prettifier}
import org.scalatest.Assertions._
object AssertionUtils {
@ -54,6 +58,52 @@ object AssertionUtils {
}
}
/**
* Assert the file content is equal to the expected lines.
* If not, throws assertion error with the given regeneration hint.
* @param expectedLines expected lines
* @param path source file path
* @param regenScript regeneration script
* @param splitFirstExpectedLine whether to split the first expected line
* into multiple lines by EOL
*/
def assertFileContent(
path: Path,
expectedLines: Traversable[String],
regenScript: String,
splitFirstExpectedLine: Boolean = false)(implicit
prettifier: Prettifier,
pos: source.Position): Unit = {
val fileSource = Source.fromFile(path.toUri, StandardCharsets.UTF_8.name())
try {
def expectedLinesIter = if (splitFirstExpectedLine) {
Source.fromString(expectedLines.head).getLines()
} else {
expectedLines.toIterator
}
val fileLinesIter = fileSource.getLines()
val regenerationHint = s"The file ($path) is out of date. " + {
if (regenScript != null && regenScript.nonEmpty) {
s" Please regenerate it by running `${regenScript.stripMargin}`. "
} else ""
}
var fileLineCount = 0
fileLinesIter.zipWithIndex.zip(expectedLinesIter)
.foreach { case ((lineInFile, lineIndex), expectedLine) =>
val lineNum = lineIndex + 1
withClue(s"Line $lineNum is not expected. $regenerationHint") {
assertResult(expectedLine)(lineInFile)(prettifier, pos)
}
fileLineCount = Math.max(lineNum, fileLineCount)
}
withClue(s"Line number is not expected. $regenerationHint") {
assertResult(expectedLinesIter.size)(fileLineCount)(prettifier, pos)
}
} finally {
fileSource.close()
}
}
/**
* Asserts that the given function throws an exception of the given type
* and with the exception message equals to expected string

View File

@ -0,0 +1,51 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kyuubi.util
import java.nio.file.{Files, Path, StandardOpenOption}
import scala.collection.JavaConverters._
import org.apache.kyuubi.util.AssertionUtils._
object GoldenFileUtils {
def isRegenerateGoldenFiles: Boolean = sys.env.get("KYUUBI_UPDATE").contains("1")
/**
* Verify the golden file content when KYUUBI_UPDATE env is not equals to 1,
* or regenerate the golden file content when KYUUBI_UPDATE env is equals to 1.
*
* @param path the path of file
* @param lines the expected lines for validation or regeneration
* @param regenScript the script for regeneration, used for hints when verification failed
*/
def verifyOrRegenerateGoldenFile(
path: Path,
lines: Iterable[String],
regenScript: String): Unit = {
if (isRegenerateGoldenFiles) {
Files.write(
path,
lines.asJava,
StandardOpenOption.CREATE,
StandardOpenOption.TRUNCATE_EXISTING)
} else {
assertFileContent(path, lines, regenScript)
}
}
}