[KYUUBI #5677][AUTHZ] Typeof expression miss column information
### _Why are the changes needed?_ To close #5677 Typeof expression miss column information, in this pr we add a place holder to support this. For sql ``` SELECT typeof(id), typeof(age) FROM default.t1 LIMIT 1 ``` Without this pr, typeof after optimizer passed to PrivilegeBuild is just an attribute, miss the origin child expression info. ``` GlobalLimit 21 +- LocalLimit 21 +- Project [int AS typeof(id)#27, int AS typeof(age)#28] +- Relation default.t1[id#18,age#19,part#20] parquet ``` When handle this plan's project list, it's an attribute and miss expression when expression result is a constant value, then we can't extract the `TypeOf`'s child expression.  This is caused by `TypeOf` expression is foldable <img width="778" alt="image" src="https://github.com/apache/kyuubi/assets/46485123/770a36fc-235f-4f26-bca7-a5058e120919"> It will be convert to a constant value after spark optimizer, then cause we miss the child expression, then can't extract columns by `collectLeaves` In this pr we wrap the `TypeOf` by a non-foldable expression placeholder then we can get the expression contains column of table when `mergeProjection` After this pr, the plan is like below ``` GlobalLimit 21 +- LocalLimit 21 +- Project [typeofplaceholder(id#21) AS typeof(id)#30, typeofplaceholder(day#23) AS typeof(day)#31] +- HiveTableRelation [`default`.`table1`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [id#21, scope#22, day#23], Partition Cols: []] ``` ### _How was this patch tested?_ - [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request ### _Was this patch authored or co-authored using generative AI tooling?_ No Closes #5678 from AngersZhuuuu/KYUUBO-5677. Closes #5677 273cd61bf [Angerszhuuuu] Update TypeOfPlaceHolder.scala 8b4383db5 [Angerszhuuuu] done af5dae541 [Angerszhuuuu] Update RangerSparkExtensionSuite.scala 37edcdf6d [Angerszhuuuu] fix for spark-3.1 df15599a3 [Angerszhuuuu] update a7dd3c453 [Angerszhuuuu] Update RangerSparkExtension.scala 6b76dac0d [Angerszhuuuu] [KYUUBI #5677][AUTHZ] Typeof expression miss column information f8a94c72d [Angerszhuuuu] [KYUUBI #5677][AUTHZ] Typeof expression miss column information Authored-by: Angerszhuuuu <angers.zhu@gmail.com> Signed-off-by: Kent Yao <yao@apache.org>
This commit is contained in:
parent
e498bdba00
commit
3c2291ef5c
@ -19,9 +19,10 @@ package org.apache.kyuubi.plugin.spark.authz.ranger
|
||||
|
||||
import org.apache.spark.sql.SparkSessionExtensions
|
||||
|
||||
import org.apache.kyuubi.plugin.spark.authz.rule.{RuleEliminateMarker, RuleEliminatePermanentViewMarker}
|
||||
import org.apache.kyuubi.plugin.spark.authz.rule.{RuleEliminateMarker, RuleEliminatePermanentViewMarker, RuleEliminateTypeOf}
|
||||
import org.apache.kyuubi.plugin.spark.authz.rule.config.AuthzConfigurationChecker
|
||||
import org.apache.kyuubi.plugin.spark.authz.rule.datamasking.{RuleApplyDataMaskingStage0, RuleApplyDataMaskingStage1}
|
||||
import org.apache.kyuubi.plugin.spark.authz.rule.expression.RuleApplyTypeOfMarker
|
||||
import org.apache.kyuubi.plugin.spark.authz.rule.permanentview.RuleApplyPermanentViewMarker
|
||||
import org.apache.kyuubi.plugin.spark.authz.rule.rowfilter.{FilterDataSourceV2Strategy, RuleApplyRowFilter, RuleReplaceShowObjectCommands}
|
||||
|
||||
@ -46,12 +47,14 @@ class RangerSparkExtension extends (SparkSessionExtensions => Unit) {
|
||||
v1.injectCheckRule(AuthzConfigurationChecker)
|
||||
v1.injectResolutionRule(_ => new RuleReplaceShowObjectCommands())
|
||||
v1.injectResolutionRule(_ => new RuleApplyPermanentViewMarker())
|
||||
v1.injectResolutionRule(_ => new RuleApplyTypeOfMarker())
|
||||
v1.injectResolutionRule(RuleApplyRowFilter)
|
||||
v1.injectResolutionRule(RuleApplyDataMaskingStage0)
|
||||
v1.injectResolutionRule(RuleApplyDataMaskingStage1)
|
||||
v1.injectOptimizerRule(_ => new RuleEliminateMarker())
|
||||
v1.injectOptimizerRule(new RuleAuthorization(_))
|
||||
v1.injectOptimizerRule(_ => new RuleEliminatePermanentViewMarker())
|
||||
v1.injectOptimizerRule(_ => new RuleEliminateTypeOf())
|
||||
v1.injectPlannerStrategy(new FilterDataSourceV2Strategy(_))
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kyuubi.plugin.spark.authz.rule
|
||||
|
||||
import org.apache.spark.sql.catalyst.expressions.TypeOf
|
||||
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
|
||||
import org.apache.spark.sql.catalyst.rules.Rule
|
||||
|
||||
import org.apache.kyuubi.plugin.spark.authz.rule.expression.TypeOfPlaceHolder
|
||||
|
||||
class RuleEliminateTypeOf extends Rule[LogicalPlan] {
|
||||
override def apply(plan: LogicalPlan): LogicalPlan = {
|
||||
plan.transformUp { case p =>
|
||||
p.transformExpressionsUp {
|
||||
case toph: TypeOfPlaceHolder => TypeOf(toph.expr)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kyuubi.plugin.spark.authz.rule.expression
|
||||
|
||||
import org.apache.spark.sql.catalyst.expressions.TypeOf
|
||||
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
|
||||
import org.apache.spark.sql.catalyst.rules.Rule
|
||||
|
||||
class RuleApplyTypeOfMarker extends Rule[LogicalPlan] {
|
||||
|
||||
override def apply(plan: LogicalPlan): LogicalPlan = {
|
||||
plan transformAllExpressions {
|
||||
case typeof: TypeOf => TypeOfPlaceHolder(typeof.child)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.kyuubi.plugin.spark.authz.rule.expression
|
||||
|
||||
import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression}
|
||||
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
|
||||
import org.apache.spark.sql.types.{DataType, StringType}
|
||||
|
||||
import org.apache.kyuubi.plugin.spark.authz.util.WithInternalExpressionChild
|
||||
|
||||
case class TypeOfPlaceHolder(expr: Expression) extends UnaryExpression
|
||||
with WithInternalExpressionChild {
|
||||
override def dataType: DataType = StringType
|
||||
|
||||
// Avoid fold constant expression by Spark Optimizer
|
||||
override def foldable: Boolean = false
|
||||
|
||||
override def child: Expression = expr
|
||||
|
||||
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
|
||||
defineCodeGen(ctx, ev, _ => s"""UTF8String.fromString(${child.dataType.catalogString})""")
|
||||
}
|
||||
|
||||
override def withNewChildInternal(newChild: Expression): Expression =
|
||||
copy(expr = newChild)
|
||||
}
|
||||
@ -17,6 +17,7 @@
|
||||
|
||||
package org.apache.kyuubi.plugin.spark.authz.util
|
||||
|
||||
import org.apache.spark.sql.catalyst.expressions.Expression
|
||||
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
|
||||
|
||||
trait WithInternalChildren {
|
||||
@ -26,3 +27,7 @@ trait WithInternalChildren {
|
||||
trait WithInternalChild {
|
||||
def withNewChildInternal(newChild: LogicalPlan): LogicalPlan
|
||||
}
|
||||
|
||||
trait WithInternalExpressionChild {
|
||||
def withNewChildInternal(newChild: Expression): Expression
|
||||
}
|
||||
|
||||
@ -23,7 +23,7 @@ import java.nio.file.Path
|
||||
import scala.util.Try
|
||||
|
||||
import org.apache.hadoop.security.UserGroupInformation
|
||||
import org.apache.spark.sql.SparkSessionExtensions
|
||||
import org.apache.spark.sql.{Row, SparkSessionExtensions}
|
||||
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
|
||||
import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
|
||||
import org.apache.spark.sql.catalyst.plans.logical.Statistics
|
||||
@ -1296,4 +1296,48 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test("[KYUUBI #5677][AUTHZ] Typeof expression miss column information") {
|
||||
val db1 = defaultDb
|
||||
val table1 = "table1"
|
||||
withSingleCallEnabled {
|
||||
withCleanTmpResources(Seq((s"$db1.$table1", "table"))) {
|
||||
doAs(
|
||||
admin,
|
||||
sql(
|
||||
s"""
|
||||
|CREATE TABLE IF NOT EXISTS $db1.$table1(
|
||||
|id int,
|
||||
|scope int,
|
||||
|day string)
|
||||
|""".stripMargin))
|
||||
doAs(admin, sql(s"INSERT INTO $db1.$table1 SELECT 1, 2, 'TONY'"))
|
||||
interceptContains[AccessControlException](
|
||||
doAs(
|
||||
someone,
|
||||
sql(s"SELECT typeof(id), typeof(typeof(day)) FROM $db1.$table1").collect()))(
|
||||
s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/day]")
|
||||
interceptContains[AccessControlException](
|
||||
doAs(
|
||||
someone,
|
||||
sql(
|
||||
s"""
|
||||
|SELECT
|
||||
|typeof(cast(id as string)),
|
||||
|typeof(substring(day, 1, 3))
|
||||
|FROM $db1.$table1""".stripMargin).collect()))(
|
||||
s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/day]")
|
||||
checkAnswer(
|
||||
admin,
|
||||
s"""
|
||||
|SELECT
|
||||
|typeof(id),
|
||||
|typeof(typeof(day)),
|
||||
|typeof(cast(id as string)),
|
||||
|typeof(substring(day, 1, 3))
|
||||
|FROM $db1.$table1""".stripMargin,
|
||||
Seq(Row("int", "string", "string", "string")))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user