Add decision tree benchmark (#140)

* Move mllib config file to resources.

* Add DecisionTreeClassification as first benchmark in mllib-large.yaml.

* Read config files as streams to be jar compatible.

* PR feedback #140.
This commit is contained in:
Bago Amirbekian 2018-05-08 21:44:11 -07:00 committed by Xiangrui Meng
parent ed9bbb01a5
commit 9ece11ff20
3 changed files with 23 additions and 0 deletions

View File

@ -0,0 +1,13 @@
output: /databricks/spark/sql/mllib-perf-ci
timeoutSeconds: 1000 # This limit is for all benchmarks and should be bumped as more are added.
common:
numExamples: 1000000
numTestExamples: 1000000
numFeatures: 4000
numPartitions: 64
randomSeed: [1, 1, 1] # Rerun 3 times to accumulate some info
benchmarks:
- name: classification.DecisionTreeClassification
params:
depth: [5, 10]
numClasses: 4

View File

@ -1,5 +1,7 @@
package com.databricks.spark.sql.perf.mllib
import scala.io.Source
import scala.language.implicitConversions
import com.typesafe.scalalogging.slf4j.{LazyLogging => Logging}
@ -34,6 +36,14 @@ object MLLib extends Logging {
e.getCurrentResults()
}
private def getConfig(resourcePath: String): String = {
val stream = getClass.getResourceAsStream(resourcePath)
Source.fromInputStream(stream).mkString
}
val smallConfig: String = getConfig("config/mllib-small.yaml")
val largeConfig: String = getConfig("config/mllib-large.yaml")
/**
* Entry point for running ML tests. Expects a single command-line argument: the path to
* a YAML config file specifying which ML tests to run and their parameters.