[ML-4069] Improve timing of estimators (#161)

This gives the following running times:
```
recommendation.ALS	72.083s
classification.DecisionTreeClassification	37.125s
classification.DecisionTreeClassification	33.274s
regression.DecisionTreeRegression	31.252s
regression.DecisionTreeRegression	63.35s
fpm.FPGrowth	6.219s
fpm.FPGrowth	5.342s
classification.GBTClassification	46.154s
regression.GBTRegression	45.832s
clustering.GaussianMixture	18.936s
regression.GLMRegression	20.342s
clustering.KMeans	32.473s
clustering.LDA	44.574s
clustering.LDA	24.658s
classification.LinearSVC	39.84s
regression.LinearRegression	43.335s
classification.LogisticRegression	41.637s
classification.LogisticRegression	37.711s
classification.NaiveBayes	23.351s
classification.RandomForestClassification	20.781s
regression.RandomForestRegression	39.971s
feature.Word2Vec	51.892s
```
This commit is contained in:
Joseph Bradley 2018-07-09 17:41:44 -07:00 committed by GitHub
parent 30c50dddbb
commit 107495afe2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -9,53 +9,58 @@ common:
benchmarks: benchmarks:
- name: classification.DecisionTreeClassification - name: classification.DecisionTreeClassification
params: params:
depth: [5, 10] depth: [5, 12]
numClasses: 4 numClasses: 4
- name: classification.GBTClassification - name: classification.GBTClassification
params: params:
numFeatures: 3000 numFeatures: 1500
depth: 5 depth: 4
numClasses: 4 numClasses: 4
maxIter: 10 maxIter: 8
- name: classification.RandomForestClassification - name: classification.RandomForestClassification
params: params:
depth: 10 depth: 10
numFeatures: 1000
numClasses: 4 numClasses: 4
maxIter: 200 # number of trees maxIter: 100
- name: classification.LogisticRegression - name: classification.LogisticRegression
params: params:
numExamples: 700000
numFeatures: 5000
elasticNetParam: [0.0, 0.5]
regParam: 0.01 regParam: 0.01
tol: 0.0 tol: 0.0
maxIter: 20 maxIter: 10
- name: classification.LinearSVC - name: classification.LinearSVC
params: params:
numExamples: 500000
regParam: 0.01 regParam: 0.01
tol: 0 tol: 0
maxIter: 20 maxIter: 10
- name: classification.NaiveBayes - name: classification.NaiveBayes
params: params:
numExamples: 2000000
numFeatures: 5000 numFeatures: 5000
numClasses: 2 numClasses: 10
smoothing: 1.0 smoothing: 1.0
- name: clustering.GaussianMixture - name: clustering.GaussianMixture
params: params:
numExamples: 100000 numFeatures: 30
numTestExamples: 100000 k: 15
numFeatures: 1000 maxIter: 15
k: 10 tol: 0.0
maxIter: 10
tol: 0.01
- name: clustering.KMeans - name: clustering.KMeans
params: params:
k: 50 k: 20
maxIter: 20 maxIter: 20
tol: 1e-3 tol: 1e-3
- name: clustering.LDA - name: clustering.LDA
params: params:
numExamples: 200000
docLength: 100 docLength: 100
vocabSize: 5000 vocabSize: 5000
k: 60 k: 20
maxIter: 20 maxIter: 10
optimizer: optimizer:
- em - em
- online - online
@ -93,45 +98,48 @@ benchmarks:
params: params:
numExamples: 10000 numExamples: 10000
vocabSize: 100 vocabSize: 100
docLength: 1000 docLength: 300
numSynonymsToFind: 3 numSynonymsToFind: 3
- name: fpm.FPGrowth - name: fpm.FPGrowth
params: params:
numExamples: 10000000
numItems: 10000 numItems: 10000
itemSetSize: [4, 10] itemSetSize: [4, 14]
- name: recommendation.ALS - name: recommendation.ALS
params: params:
numExamples: 50000000 numExamples: 20000000
numTestExamples: 50000000 numTestExamples: 50000000
numUsers: 6000000 numUsers: 4000000
numItems: 6000000 numItems: 4000000
regParam: 0.01 regParam: 0.01
rank: 10 rank: 10
maxIter: 10 maxIter: 8
- name: regression.DecisionTreeRegression - name: regression.DecisionTreeRegression
params: params:
depth: [5, 10] depth: [4, 7]
- name: regression.GBTRegression - name: regression.GBTRegression
params: params:
numFeatures: 2000 numFeatures: 1000
depth: 5 depth: 4
maxIter: 5 maxIter: 8
- name: regression.GLMRegression - name: regression.GLMRegression
params: params:
numExamples: 500000 numExamples: 400000
numTestExamples: 500000 numTestExamples: 500000
numFeatures: 1000 numFeatures: 500
link: log link: log
family: gaussian family: gaussian
tol: 0.0 tol: 0.0
maxIter: 10 maxIter: 8
regParam: 0.1 regParam: 0.1
- name: regression.LinearRegression - name: regression.LinearRegression
params: params:
numFeatures: 5000
regParam: 0.01 regParam: 0.01
tol: 0.0 tol: 0.0
maxIter: 20 maxIter: 9
- name: regression.RandomForestRegression - name: regression.RandomForestRegression
params: params:
depth: 10 depth: 7
maxIter: 4 numFeatures: 500
maxIter: 16