This commit is contained in:
Joseph K. Bradley 2016-06-30 10:45:15 -07:00
parent ecf2eedbb8
commit c15d083fe7
4 changed files with 16 additions and 9 deletions

View File

@ -50,7 +50,6 @@ class MLTransformerBenchmarkable(
logger.info(s"$this: train: trainingSet=${trainingData.schema}")
val estimator = test.getEstimator(param)
estimator.fit(trainingData)
//test.train(param, trainingData)
}
logger.info(s"model: $model")
val (_, scoreTraining) = measureTime {

View File

@ -1,7 +1,7 @@
package com.databricks.spark.sql.perf.mllib.clustering
import com.databricks.spark.sql.perf.mllib.{BenchmarkAlgorithm, MLBenchContext, TestFromTraining}
import com.databricks.spark.sql.perf.mllib.OptionImplicits._
import scala.collection.mutable.{HashMap => MHashMap}
import org.apache.commons.math3.random.Well19937c
import org.apache.spark.ml.Estimator
@ -9,7 +9,10 @@ import org.apache.spark.ml
import org.apache.spark.rdd.RDD
import org.apache.spark.sql._
import org.apache.spark.ml.linalg.{Vector, Vectors}
import scala.collection.mutable.{HashMap => MHashMap}
import com.databricks.spark.sql.perf.mllib.{BenchmarkAlgorithm, MLBenchContext, TestFromTraining}
import com.databricks.spark.sql.perf.mllib.OptionImplicits._
object LDA extends BenchmarkAlgorithm with TestFromTraining {
// The LDA model is package private, no need to expose it.
@ -51,4 +54,4 @@ object LDA extends BenchmarkAlgorithm with TestFromTraining {
}
// TODO(?) add a scoring method here.
}
}

View File

@ -21,6 +21,11 @@ object DataGenerator {
sql.createDataFrame(rdd.map(Tuple1.apply)).toDF("features")
}
/**
* Generate a mix of continuous and categorical features.
* @param featureArity Array of length numFeatures, where 0 indicates a continuous feature and
* a value > 0 indicates a categorical feature with that arity.
*/
def generateMixedFeatures(
sql: SQLContext,
numExamples: Long,

View File

@ -1,14 +1,14 @@
package com.databricks.spark.sql.perf.mllib.regression
import com.databricks.spark.sql.perf.mllib.OptionImplicits._
import com.databricks.spark.sql.perf.mllib._
import com.databricks.spark.sql.perf.mllib.data.DataGenerator
import org.apache.spark.ml.evaluation.{Evaluator, RegressionEvaluator}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.regression.GeneralizedLinearRegression
import org.apache.spark.ml.{Estimator, ModelBuilder, Transformer}
import com.databricks.spark.sql.perf.mllib.OptionImplicits._
import com.databricks.spark.sql.perf.mllib._
import com.databricks.spark.sql.perf.mllib.data.DataGenerator
object GLMRegression extends BenchmarkAlgorithm with TestFromTraining with
TrainingSetFromTransformer with ScoringWithEvaluator {