Add decision tree benchmark (#140)
* Move mllib config file to resources. * Add DecisionTreeClassification as first benchmark in mllib-large.yaml. * Read config files as streams to be jar compatible. * PR feedback #140.
This commit is contained in:
parent
ed9bbb01a5
commit
9ece11ff20
@ -0,0 +1,13 @@
|
||||
output: /databricks/spark/sql/mllib-perf-ci
|
||||
timeoutSeconds: 1000 # This limit is for all benchmarks and should be bumped as more are added.
|
||||
common:
|
||||
numExamples: 1000000
|
||||
numTestExamples: 1000000
|
||||
numFeatures: 4000
|
||||
numPartitions: 64
|
||||
randomSeed: [1, 1, 1] # Rerun 3 times to accumulate some info
|
||||
benchmarks:
|
||||
- name: classification.DecisionTreeClassification
|
||||
params:
|
||||
depth: [5, 10]
|
||||
numClasses: 4
|
||||
@ -1,5 +1,7 @@
|
||||
package com.databricks.spark.sql.perf.mllib
|
||||
|
||||
|
||||
import scala.io.Source
|
||||
import scala.language.implicitConversions
|
||||
|
||||
import com.typesafe.scalalogging.slf4j.{LazyLogging => Logging}
|
||||
@ -34,6 +36,14 @@ object MLLib extends Logging {
|
||||
e.getCurrentResults()
|
||||
}
|
||||
|
||||
private def getConfig(resourcePath: String): String = {
|
||||
val stream = getClass.getResourceAsStream(resourcePath)
|
||||
Source.fromInputStream(stream).mkString
|
||||
}
|
||||
|
||||
val smallConfig: String = getConfig("config/mllib-small.yaml")
|
||||
val largeConfig: String = getConfig("config/mllib-large.yaml")
|
||||
|
||||
/**
|
||||
* Entry point for running ML tests. Expects a single command-line argument: the path to
|
||||
* a YAML config file specifying which ML tests to run and their parameters.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user