From bcda8fc1e5d7bba89f8c608c836f3566330ffdec Mon Sep 17 00:00:00 2001
From: Juliusz Sompolski <julek@databricks.com>
Date: Mon, 4 Sep 2017 18:05:42 +0200
Subject: [PATCH] Coalesce non-partitioned tables. (#118)

In #109 coalescing of non-partitioned tables into 1 file seems to have gotten accidentally removed.
Put it back, but only when clusterByPartitionedColumns == true
Considering that we coalesce partitions only when that setting is true, it seems to be consistent to use it also for non-partitioned tables.

It may be better to change the name of the parameter, but that changes the interface, and possibly should be left for some future clean up.
---
 src/main/scala/com/databricks/spark/sql/perf/Tables.scala | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/main/scala/com/databricks/spark/sql/perf/Tables.scala b/src/main/scala/com/databricks/spark/sql/perf/Tables.scala
index 368ea5d..775dfd8 100644
--- a/src/main/scala/com/databricks/spark/sql/perf/Tables.scala
+++ b/src/main/scala/com/databricks/spark/sql/perf/Tables.scala
@@ -211,7 +211,12 @@ abstract class Tables(sqlContext: SQLContext, scaleFactor: String,
           data.write
         }
       } else {
-        data.write
+        if (clusterByPartitionColumns) {
+          // treat non-partitioned tables as "one partition" that we want to coalesce
+          data.coalesce(1).write
+        } else {
+          data.write
+        }
       }
       writer.format(format).mode(mode)
       if (partitionColumns.nonEmpty) {