diff --git a/src/main/resources/com/databricks/spark/sql/perf/mllib/config/mllib-large.yaml b/src/main/resources/com/databricks/spark/sql/perf/mllib/config/mllib-large.yaml index 26a2970..1f4c3ee 100644 --- a/src/main/resources/com/databricks/spark/sql/perf/mllib/config/mllib-large.yaml +++ b/src/main/resources/com/databricks/spark/sql/perf/mllib/config/mllib-large.yaml @@ -59,6 +59,42 @@ benchmarks: optimizer: - em - online + - name: feature.BucketedRandomProjectionLSH + params: + numHashTables: 20 + - name: feature.Bucketizer + params: + bucketizerNumBuckets: 20 + - name: feature.HashingTF + params: + docLength: 2000 + vocabSize: 200 + - name: feature.MinHashLSH + params: + numFeatures: 10000 + numHashTables: 10 + - name: feature.OneHotEncoder + params: + featureArity: 10000 + - name: feature.StringIndexer + params: + vocabSize: 10000 + - name: feature.Tokenizer + params: + vocabSize: 2000 + docLength: 10000 + - name: feature.VectorAssembler + params: + numInputCols: 20 + - name: feature.VectorSlicer + params: + numFeatures: 5000 + - name: feature.Word2Vec + params: + numExamples: 10000 + vocabSize: 100 + docLength: 1000 + numSynonymsToFind: 3 - name: fpm.FPGrowth params: numItems: 10000 @@ -93,4 +129,4 @@ benchmarks: - name: regression.RandomForestRegression params: depth: 10 - maxIter: 4 \ No newline at end of file + maxIter: 4