[KYUUBI #7077] Spark 3.5: Enhance MaxScanStrategy for DSv2
### Why are the changes needed? To enhance the MaxScanStrategy in Spark's DSv2 to ensure it only works for relations that support statistics reporting. This prevents Spark from returning a default value of Long.MaxValue, which, leads to some queries failing or behaving unexpectedly. ### How was this patch tested? It tested out locally. ### Was this patch authored or co-authored using generative AI tooling? No Closes #7077 from zhaohehuhu/dev-0527. Closes #7077 64001c94e [zhaohehuhu] fix MaxScanStrategy for datasource v2 Authored-by: zhaohehuhu <luoyedeyi459@163.com> Signed-off-by: Cheng Pan <chengpan@apache.org>
This commit is contained in:
parent
1af6647132
commit
bcaff5a3f1
@ -23,6 +23,7 @@ import org.apache.spark.sql.catalyst.SQLConfHelper
|
|||||||
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, HiveTableRelation}
|
import org.apache.spark.sql.catalyst.catalog.{CatalogTable, HiveTableRelation}
|
||||||
import org.apache.spark.sql.catalyst.planning.ScanOperation
|
import org.apache.spark.sql.catalyst.planning.ScanOperation
|
||||||
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
|
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
|
||||||
|
import org.apache.spark.sql.connector.read.SupportsReportStatistics
|
||||||
import org.apache.spark.sql.execution.SparkPlan
|
import org.apache.spark.sql.execution.SparkPlan
|
||||||
import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, InMemoryFileIndex, LogicalRelation}
|
import org.apache.spark.sql.execution.datasources.{CatalogFileIndex, HadoopFsRelation, InMemoryFileIndex, LogicalRelation}
|
||||||
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
|
import org.apache.spark.sql.execution.datasources.v2.DataSourceV2ScanRelation
|
||||||
@ -237,7 +238,7 @@ case class MaxScanStrategy(session: SparkSession)
|
|||||||
_,
|
_,
|
||||||
_,
|
_,
|
||||||
_,
|
_,
|
||||||
relation @ DataSourceV2ScanRelation(_, _, _, _, _)) =>
|
relation @ DataSourceV2ScanRelation(_, _: SupportsReportStatistics, _, _, _)) =>
|
||||||
val table = relation.relation.table
|
val table = relation.relation.table
|
||||||
if (table.partitioning().nonEmpty) {
|
if (table.partitioning().nonEmpty) {
|
||||||
val partitionColumnNames = table.partitioning().map(_.describe())
|
val partitionColumnNames = table.partitioning().map(_.describe())
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user