Rebase for PR 87: Add -m for custom master, use SBT_HOME if set (#169)

* Add -m for custom master * Add ability to use own sbt jar, update readme to include -m option * Add stddev percentage showing
2018-09-17 15:18:16 +02:00 · 2018-09-17 15:18:16 +02:00 · 3c1c9e9070
commit 3c1c9e9070
parent d9a41a1204
3 changed files with 31 additions and 10 deletions
--- a/README.md
+++ b/README.md
@ -18,6 +18,8 @@ Usage: spark-sql-perf [options]

  -b <value> | --benchmark <value>
        the name of the benchmark to run
+  -m <value> | --master <value
+        the master url to use
  -f <value> | --filter <value>
        a filter on the name of the queries to run
  -i <value> | --iterations <value>
--- a/build/sbt-launch-lib.bash
+++ b/build/sbt-launch-lib.bash
@ -26,6 +26,13 @@ else
    declare java_cmd=java
 fi

+if test -x "$SBT_HOME"; then
+  echo -e "Using $SBT_HOME as default SBT_HOME - should be the jar name!"
+  # Could be at /usr/share/sbt-launcher-packaging/bin/sbt-launch.jar
+  # so this would be export SBT_HOME=/usr/share/sbt-launcher-packaging/bin/sbt-launch.jar
+  sbt_jar=${SBT_HOME}
+fi
+
 echoerr () {
  echo 1>&2 "$@"
 }
@ -165,7 +172,9 @@ process_args () {
 }

 run() {
-  # no jar? download it.
+  # first check SBT_HOME is present so we use what's already available
+  sbt_jar=$SBT_HOME
+  # if there's no jar let's download it.
  [[ -f "$sbt_jar" ]] || acquire_sbt_jar "$sbt_version" || {
    # still no jar? uh-oh.
    echo "Download failed. Obtain the sbt-launch.jar manually and place it at $sbt_jar"
--- a/src/main/scala/com/databricks/spark/sql/perf/RunBenchmark.scala
+++ b/src/main/scala/com/databricks/spark/sql/perf/RunBenchmark.scala
@ -17,14 +17,14 @@
 package com.databricks.spark.sql.perf

 import java.net.InetAddress
-
+import java.io.File
 import org.apache.spark.sql.SQLContext
 import org.apache.spark.sql.functions._
 import org.apache.spark.{SparkContext, SparkConf}
-
 import scala.util.Try

 case class RunConfig(
+    master: String = "local[*]",
    benchmarkName: String = null,
    filter: Option[String] = None,
    iterations: Int = 3,
@ -37,6 +37,9 @@ object RunBenchmark {
  def main(args: Array[String]): Unit = {
    val parser = new scopt.OptionParser[RunConfig]("spark-sql-perf") {
      head("spark-sql-perf", "0.2.0")
+      opt[String]('m', "master")
+        .action { (x, c) => c.copy(master = x) }
+        .text("the Spark master to use, default to local[*]")
      opt[String]('b', "benchmark")
        .action { (x, c) => c.copy(benchmarkName = x) }
        .text("the name of the benchmark to run")
@ -64,14 +67,16 @@ object RunBenchmark {

  def run(config: RunConfig): Unit = {
    val conf = new SparkConf()
-        .setMaster("local[*]")
+      .setMaster(config.master)
      .setAppName(getClass.getName)

    val sc = SparkContext.getOrCreate(conf)
    val sqlContext = SQLContext.getOrCreate(sc)
    import sqlContext.implicits._

-    sqlContext.setConf("spark.sql.perf.results", new java.io.File("performance").toURI.toString)
+    sqlContext.setConf("spark.sql.perf.results",
+      new File("performance").toURI.toString)
+
    val benchmark = Try {
      Class.forName(config.benchmarkName)
          .newInstance()
@ -102,7 +107,8 @@ object RunBenchmark {
    experiment.waitForFinish(1000 * 60 * 30)

    sqlContext.setConf("spark.sql.shuffle.partitions", "1")
-    experiment.getCurrentRuns()
+      
+    val toShow = experiment.getCurrentRuns()
        .withColumn("result", explode($"results"))
        .select("result.*")
        .groupBy("name")
@ -110,9 +116,13 @@ object RunBenchmark {
          min($"executionTime") as 'minTimeMs,
          max($"executionTime") as 'maxTimeMs,
          avg($"executionTime") as 'avgTimeMs,
-          stddev($"executionTime") as 'stdDev)
+          stddev($"executionTime") as 'stdDev,
+          (stddev($"executionTime") / avg($"executionTime") * 100) as 'stdDevPercent)
        .orderBy("name")
-        .show(truncate = false)
+        
+    println("Showing at most 100 query results now")
+    toShow.show(100)
+      
    println(s"""Results: sqlContext.read.json("${experiment.resultPath}")""")

    config.baseline.foreach { baseTimestamp =>