Rebase for PR 87: Add -m for custom master, use SBT_HOME if set (#169)

* Add -m for custom master
* Add ability to use own sbt jar, update readme to include -m option
* Add stddev percentage showing
This commit is contained in:
Nico Poggi 2018-09-17 15:18:16 +02:00 committed by GitHub
parent d9a41a1204
commit 3c1c9e9070
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 31 additions and 10 deletions

View File

@ -18,6 +18,8 @@ Usage: spark-sql-perf [options]
-b <value> | --benchmark <value>
the name of the benchmark to run
-m <value> | --master <value
the master url to use
-f <value> | --filter <value>
a filter on the name of the queries to run
-i <value> | --iterations <value>

View File

@ -26,6 +26,13 @@ else
declare java_cmd=java
fi
if test -x "$SBT_HOME"; then
echo -e "Using $SBT_HOME as default SBT_HOME - should be the jar name!"
# Could be at /usr/share/sbt-launcher-packaging/bin/sbt-launch.jar
# so this would be export SBT_HOME=/usr/share/sbt-launcher-packaging/bin/sbt-launch.jar
sbt_jar=${SBT_HOME}
fi
echoerr () {
echo 1>&2 "$@"
}
@ -165,7 +172,9 @@ process_args () {
}
run() {
# no jar? download it.
# first check SBT_HOME is present so we use what's already available
sbt_jar=$SBT_HOME
# if there's no jar let's download it.
[[ -f "$sbt_jar" ]] || acquire_sbt_jar "$sbt_version" || {
# still no jar? uh-oh.
echo "Download failed. Obtain the sbt-launch.jar manually and place it at $sbt_jar"

View File

@ -17,14 +17,14 @@
package com.databricks.spark.sql.perf
import java.net.InetAddress
import java.io.File
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.functions._
import org.apache.spark.{SparkContext, SparkConf}
import scala.util.Try
case class RunConfig(
master: String = "local[*]",
benchmarkName: String = null,
filter: Option[String] = None,
iterations: Int = 3,
@ -37,6 +37,9 @@ object RunBenchmark {
def main(args: Array[String]): Unit = {
val parser = new scopt.OptionParser[RunConfig]("spark-sql-perf") {
head("spark-sql-perf", "0.2.0")
opt[String]('m', "master")
.action { (x, c) => c.copy(master = x) }
.text("the Spark master to use, default to local[*]")
opt[String]('b', "benchmark")
.action { (x, c) => c.copy(benchmarkName = x) }
.text("the name of the benchmark to run")
@ -64,14 +67,16 @@ object RunBenchmark {
def run(config: RunConfig): Unit = {
val conf = new SparkConf()
.setMaster("local[*]")
.setMaster(config.master)
.setAppName(getClass.getName)
val sc = SparkContext.getOrCreate(conf)
val sqlContext = SQLContext.getOrCreate(sc)
import sqlContext.implicits._
sqlContext.setConf("spark.sql.perf.results", new java.io.File("performance").toURI.toString)
sqlContext.setConf("spark.sql.perf.results",
new File("performance").toURI.toString)
val benchmark = Try {
Class.forName(config.benchmarkName)
.newInstance()
@ -102,7 +107,8 @@ object RunBenchmark {
experiment.waitForFinish(1000 * 60 * 30)
sqlContext.setConf("spark.sql.shuffle.partitions", "1")
experiment.getCurrentRuns()
val toShow = experiment.getCurrentRuns()
.withColumn("result", explode($"results"))
.select("result.*")
.groupBy("name")
@ -110,9 +116,13 @@ object RunBenchmark {
min($"executionTime") as 'minTimeMs,
max($"executionTime") as 'maxTimeMs,
avg($"executionTime") as 'avgTimeMs,
stddev($"executionTime") as 'stdDev)
stddev($"executionTime") as 'stdDev,
(stddev($"executionTime") / avg($"executionTime") * 100) as 'stdDevPercent)
.orderBy("name")
.show(truncate = false)
println("Showing at most 100 query results now")
toShow.show(100)
println(s"""Results: sqlContext.read.json("${experiment.resultPath}")""")
config.baseline.foreach { baseTimestamp =>