43 lines
1.3 KiB
Markdown
43 lines
1.3 KiB
Markdown
## Spark SQL Performance Tests (WIP)
|
|
|
|
### TPC-DS
|
|
```
|
|
import com.databricks.spark.sql.perf.tpcds.TPCDS
|
|
import org.apache.spark.sql.parquet.Tables
|
|
|
|
// Tables used for TPC-DS.
|
|
val tables = Tables(sqlContext)
|
|
|
|
// Setup TPC-DS experiment
|
|
val tpcds =
|
|
new TPCDS (
|
|
sqlContext = sqlContext,
|
|
sparkVersion = "1.3.1",
|
|
dataLocation = <the location of data>,
|
|
dsdgenDir = <the location of dsdgen in every worker>,
|
|
resultsLocation = <the location of performance results>,
|
|
tables = tables.tables,
|
|
scaleFactor = "2",
|
|
collectResults = true)
|
|
tpcds.setupExperiment()
|
|
|
|
// Take a look at the size of every table.
|
|
tpcds.allStats.show
|
|
|
|
// Get all of the queries.
|
|
import com.databricks.spark.sql.perf.tpcds.Queries
|
|
// Just pick a single query as an example.
|
|
val oneQuery = Seq(Queries.q7Derived.head)
|
|
// Start the experiment.
|
|
val runningExp = tpcds.runExperiment(queries = oneQuery, iterations = 1)
|
|
|
|
// Get experiments results.
|
|
import com.databricks.spark.sql.perf.Results
|
|
val results = Results(resultsLocation = <the location of performance results>, sqlContext = sqlContext)
|
|
// This is all results.
|
|
val allResults = results.allResults
|
|
allResults.registerTempTable("results")
|
|
// This is the result for a single experiment started at the timestamp represented by 1429132621024 (2015-04-15 14:17:01.024).
|
|
allResults.filter("timestamp = 1429132621024")
|
|
```
|