Rebase for PR 87: Add -m for custom master, use SBT_HOME if set (#169)
* Add -m for custom master * Add ability to use own sbt jar, update readme to include -m option * Add stddev percentage showing
This commit is contained in:
parent
d9a41a1204
commit
3c1c9e9070
@ -18,6 +18,8 @@ Usage: spark-sql-perf [options]
|
|||||||
|
|
||||||
-b <value> | --benchmark <value>
|
-b <value> | --benchmark <value>
|
||||||
the name of the benchmark to run
|
the name of the benchmark to run
|
||||||
|
-m <value> | --master <value
|
||||||
|
the master url to use
|
||||||
-f <value> | --filter <value>
|
-f <value> | --filter <value>
|
||||||
a filter on the name of the queries to run
|
a filter on the name of the queries to run
|
||||||
-i <value> | --iterations <value>
|
-i <value> | --iterations <value>
|
||||||
|
|||||||
@ -26,6 +26,13 @@ else
|
|||||||
declare java_cmd=java
|
declare java_cmd=java
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if test -x "$SBT_HOME"; then
|
||||||
|
echo -e "Using $SBT_HOME as default SBT_HOME - should be the jar name!"
|
||||||
|
# Could be at /usr/share/sbt-launcher-packaging/bin/sbt-launch.jar
|
||||||
|
# so this would be export SBT_HOME=/usr/share/sbt-launcher-packaging/bin/sbt-launch.jar
|
||||||
|
sbt_jar=${SBT_HOME}
|
||||||
|
fi
|
||||||
|
|
||||||
echoerr () {
|
echoerr () {
|
||||||
echo 1>&2 "$@"
|
echo 1>&2 "$@"
|
||||||
}
|
}
|
||||||
@ -165,7 +172,9 @@ process_args () {
|
|||||||
}
|
}
|
||||||
|
|
||||||
run() {
|
run() {
|
||||||
# no jar? download it.
|
# first check SBT_HOME is present so we use what's already available
|
||||||
|
sbt_jar=$SBT_HOME
|
||||||
|
# if there's no jar let's download it.
|
||||||
[[ -f "$sbt_jar" ]] || acquire_sbt_jar "$sbt_version" || {
|
[[ -f "$sbt_jar" ]] || acquire_sbt_jar "$sbt_version" || {
|
||||||
# still no jar? uh-oh.
|
# still no jar? uh-oh.
|
||||||
echo "Download failed. Obtain the sbt-launch.jar manually and place it at $sbt_jar"
|
echo "Download failed. Obtain the sbt-launch.jar manually and place it at $sbt_jar"
|
||||||
|
|||||||
@ -17,14 +17,14 @@
|
|||||||
package com.databricks.spark.sql.perf
|
package com.databricks.spark.sql.perf
|
||||||
|
|
||||||
import java.net.InetAddress
|
import java.net.InetAddress
|
||||||
|
import java.io.File
|
||||||
import org.apache.spark.sql.SQLContext
|
import org.apache.spark.sql.SQLContext
|
||||||
import org.apache.spark.sql.functions._
|
import org.apache.spark.sql.functions._
|
||||||
import org.apache.spark.{SparkContext, SparkConf}
|
import org.apache.spark.{SparkContext, SparkConf}
|
||||||
|
|
||||||
import scala.util.Try
|
import scala.util.Try
|
||||||
|
|
||||||
case class RunConfig(
|
case class RunConfig(
|
||||||
|
master: String = "local[*]",
|
||||||
benchmarkName: String = null,
|
benchmarkName: String = null,
|
||||||
filter: Option[String] = None,
|
filter: Option[String] = None,
|
||||||
iterations: Int = 3,
|
iterations: Int = 3,
|
||||||
@ -37,6 +37,9 @@ object RunBenchmark {
|
|||||||
def main(args: Array[String]): Unit = {
|
def main(args: Array[String]): Unit = {
|
||||||
val parser = new scopt.OptionParser[RunConfig]("spark-sql-perf") {
|
val parser = new scopt.OptionParser[RunConfig]("spark-sql-perf") {
|
||||||
head("spark-sql-perf", "0.2.0")
|
head("spark-sql-perf", "0.2.0")
|
||||||
|
opt[String]('m', "master")
|
||||||
|
.action { (x, c) => c.copy(master = x) }
|
||||||
|
.text("the Spark master to use, default to local[*]")
|
||||||
opt[String]('b', "benchmark")
|
opt[String]('b', "benchmark")
|
||||||
.action { (x, c) => c.copy(benchmarkName = x) }
|
.action { (x, c) => c.copy(benchmarkName = x) }
|
||||||
.text("the name of the benchmark to run")
|
.text("the name of the benchmark to run")
|
||||||
@ -64,14 +67,16 @@ object RunBenchmark {
|
|||||||
|
|
||||||
def run(config: RunConfig): Unit = {
|
def run(config: RunConfig): Unit = {
|
||||||
val conf = new SparkConf()
|
val conf = new SparkConf()
|
||||||
.setMaster("local[*]")
|
.setMaster(config.master)
|
||||||
.setAppName(getClass.getName)
|
.setAppName(getClass.getName)
|
||||||
|
|
||||||
val sc = SparkContext.getOrCreate(conf)
|
val sc = SparkContext.getOrCreate(conf)
|
||||||
val sqlContext = SQLContext.getOrCreate(sc)
|
val sqlContext = SQLContext.getOrCreate(sc)
|
||||||
import sqlContext.implicits._
|
import sqlContext.implicits._
|
||||||
|
|
||||||
sqlContext.setConf("spark.sql.perf.results", new java.io.File("performance").toURI.toString)
|
sqlContext.setConf("spark.sql.perf.results",
|
||||||
|
new File("performance").toURI.toString)
|
||||||
|
|
||||||
val benchmark = Try {
|
val benchmark = Try {
|
||||||
Class.forName(config.benchmarkName)
|
Class.forName(config.benchmarkName)
|
||||||
.newInstance()
|
.newInstance()
|
||||||
@ -102,7 +107,8 @@ object RunBenchmark {
|
|||||||
experiment.waitForFinish(1000 * 60 * 30)
|
experiment.waitForFinish(1000 * 60 * 30)
|
||||||
|
|
||||||
sqlContext.setConf("spark.sql.shuffle.partitions", "1")
|
sqlContext.setConf("spark.sql.shuffle.partitions", "1")
|
||||||
experiment.getCurrentRuns()
|
|
||||||
|
val toShow = experiment.getCurrentRuns()
|
||||||
.withColumn("result", explode($"results"))
|
.withColumn("result", explode($"results"))
|
||||||
.select("result.*")
|
.select("result.*")
|
||||||
.groupBy("name")
|
.groupBy("name")
|
||||||
@ -110,9 +116,13 @@ object RunBenchmark {
|
|||||||
min($"executionTime") as 'minTimeMs,
|
min($"executionTime") as 'minTimeMs,
|
||||||
max($"executionTime") as 'maxTimeMs,
|
max($"executionTime") as 'maxTimeMs,
|
||||||
avg($"executionTime") as 'avgTimeMs,
|
avg($"executionTime") as 'avgTimeMs,
|
||||||
stddev($"executionTime") as 'stdDev)
|
stddev($"executionTime") as 'stdDev,
|
||||||
|
(stddev($"executionTime") / avg($"executionTime") * 100) as 'stdDevPercent)
|
||||||
.orderBy("name")
|
.orderBy("name")
|
||||||
.show(truncate = false)
|
|
||||||
|
println("Showing at most 100 query results now")
|
||||||
|
toShow.show(100)
|
||||||
|
|
||||||
println(s"""Results: sqlContext.read.json("${experiment.resultPath}")""")
|
println(s"""Results: sqlContext.read.json("${experiment.resultPath}")""")
|
||||||
|
|
||||||
config.baseline.foreach { baseTimestamp =>
|
config.baseline.foreach { baseTimestamp =>
|
||||||
@ -136,4 +146,4 @@ object RunBenchmark {
|
|||||||
data.show(truncate = false)
|
data.show(truncate = false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user