diff --git a/src/main/scala/com/databricks/spark/sql/perf/Query.scala b/src/main/scala/com/databricks/spark/sql/perf/Query.scala index 498835f..3868efd 100644 --- a/src/main/scala/com/databricks/spark/sql/perf/Query.scala +++ b/src/main/scala/com/databricks/spark/sql/perf/Query.scala @@ -127,13 +127,10 @@ class Query( case ExecutionMode.WriteParquet(location) => dataFrame.write.parquet(s"$location/$name.parquet") case ExecutionMode.HashResults => - val columnStr = dataFrame.schema.map(_.name).mkString(",") - // SELECT SUM(HASH(col1, col2, ...)) FROM (benchmark query) + // SELECT SUM(CRC32(CONCAT_WS(", ", *))) FROM (benchmark query) val row = dataFrame - .selectExpr(s"hash($columnStr) as hashValue") - .groupBy() - .sum("hashValue") + .selectExpr(s"sum(crc32(concat_ws(',', *)))") .head() result = if (row.isNullAt(0)) None else Some(row.getLong(0)) } @@ -169,4 +166,4 @@ class Query( def checkResult: Query = { new Query(name, buildDataFrame, description, sqlText, ExecutionMode.HashResults) } -} \ No newline at end of file +} diff --git a/version.sbt b/version.sbt index 1524105..f4273b3 100644 --- a/version.sbt +++ b/version.sbt @@ -1 +1 @@ -version in ThisBuild := "0.4.8-SNAPSHOT" +version in ThisBuild := "0.4.9-SNAPSHOT"