From 6a45dc8a2d382d6097ee7e1ad84906652eee4f76 Mon Sep 17 00:00:00 2001 From: ludatabricks <38018689+ludatabricks@users.noreply.github.com> Date: Tue, 12 Jun 2018 10:32:02 -0700 Subject: [PATCH] [ML-3581] Add benchmarks to mllib-large.yaml for regression (#150) Benchmark for regression is added to mllib-large.yaml. DecisionTreeRegression, GLMRegression, LinearRegression, and RandomForestRegression are added. GBT, AFTSurvivalRegression, and IsotonicRegression are missing in spark-sql-perf. --- .../sql/perf/mllib/config/mllib-large.yaml | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/src/main/resources/com/databricks/spark/sql/perf/mllib/config/mllib-large.yaml b/src/main/resources/com/databricks/spark/sql/perf/mllib/config/mllib-large.yaml index 741359d..93e39f4 100644 --- a/src/main/resources/com/databricks/spark/sql/perf/mllib/config/mllib-large.yaml +++ b/src/main/resources/com/databricks/spark/sql/perf/mllib/config/mllib-large.yaml @@ -67,4 +67,26 @@ benchmarks: numItems: 6000000 regParam: 0.01 rank: 10 - maxIter: 10 \ No newline at end of file + maxIter: 10 + - name: regression.DecisionTreeRegression + params: + depth: [5, 10] + - name: regression.GLMRegression + params: + numExamples: 500000 + numTestExamples: 500000 + numFeatures: 1000 + link: log + family: gaussian + tol: 0.0 + maxIter: 10 + regParam: 0.1 + - name: regression.LinearRegression + params: + regParam: 0.01 + tol: 0.0 + maxIter: 20 + - name: regression.RandomForestRegression + params: + depth: 10 + maxIter: 4 \ No newline at end of file