Go to file
2015-04-15 20:03:14 -07:00
build Initial port. 2015-04-15 20:03:14 -07:00
project Initial port. 2015-04-15 20:03:14 -07:00
src/main/scala/com/databricks/spark/sql/perf Initial port. 2015-04-15 20:03:14 -07:00
.gitignore Initial port. 2015-04-15 20:03:14 -07:00
build.sbt Initial port. 2015-04-15 20:03:14 -07:00
LICENSE Initial port. 2015-04-15 20:03:14 -07:00
README.md Initial port. 2015-04-15 20:03:14 -07:00

Spark SQL Performance Tests (WIP)

TPC-DS

import com.databricks.spark.sql.perf.tpcds.TPCDS
import org.apache.spark.sql.parquet.Tables

// Tables used for TPC-DS.
val tables = Tables(sqlContext)

// Setup TPC-DS experiment
val tpcds =
new TPCDS (
    sqlContext = sqlContext,
    sparkVersion = "1.3.1",
    dataLocation = <the location of data>,
    dsdgenDir = <the location of dsdgen in every worker>,
    resultsLocation = <the location of performance results>,
    tables = tables.tables,
    scaleFactor = "2",
    collectResults = true)
tpcds.setupExperiment()

// Take a look at the size of every table.
tpcds.allStats.show

// Get all of the queries.
import com.databricks.spark.sql.perf.tpcds.Queries
// Just pick a single query as an example.
val oneQuery = Seq(Queries.q7Derived.head)
// Start the experiment.
val runningExp = tpcds.runExperiment(queries = oneQuery, iterations = 1)

// Get experiments results.
import com.databricks.spark.sql.perf.Results
val results = Results(resultsLocation = <the location of performance results>, sqlContext = sqlContext)
// This is all results.
val allResults = results.allResults
allResults.registerTempTable("results")
// This is the result for a single experiment started at the timestamp represented by 1429132621024 (2015-04-15 14:17:01.024). 
allResults.filter("timestamp = 1429132621024")