[KYUUBI #5677][AUTHZ] Typeof expression miss column information

### _Why are the changes needed?_
To close #5677
Typeof expression miss column information, in this pr we add a place holder to support this.

For sql
```
SELECT typeof(id), typeof(age) FROM default.t1 LIMIT 1
```

Without this pr, typeof after optimizer passed to PrivilegeBuild is just an attribute, miss the origin child expression info.
```
GlobalLimit 21
+- LocalLimit 21
   +- Project [int AS typeof(id)#27, int AS typeof(age)#28]
      +- Relation default.t1[id#18,age#19,part#20] parquet
```

When handle this plan's project list, it's an attribute and miss expression when expression result is a constant value, then we can't extract the `TypeOf`'s child expression.
![image](https://github.com/apache/kyuubi/assets/46485123/d27e324d-4db9-4fb5-bba4-fa795601d7f7)

This is caused by `TypeOf`  expression is foldable
<img width="778" alt="image" src="https://github.com/apache/kyuubi/assets/46485123/770a36fc-235f-4f26-bca7-a5058e120919">

It will be convert to a constant value after spark optimizer, then cause we miss the child expression, then can't extract columns by `collectLeaves`

In this pr we wrap the `TypeOf` by a non-foldable expression placeholder then we can get the expression contains column of table when `mergeProjection`
After this pr, the plan is like below
```
GlobalLimit 21
+- LocalLimit 21
   +- Project [typeofplaceholder(id#21) AS typeof(id)#30, typeofplaceholder(day#23) AS typeof(day)#31]
      +- HiveTableRelation [`default`.`table1`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, Data Cols: [id#21, scope#22, day#23], Partition Cols: []]

```

### _How was this patch tested?_
- [x] Add some test cases that check the changes thoroughly including negative and positive cases if possible

- [ ] Add screenshots for manual tests if appropriate

- [ ] [Run test](https://kyuubi.readthedocs.io/en/master/contributing/code/testing.html#running-tests) locally before make a pull request

### _Was this patch authored or co-authored using generative AI tooling?_
No

Closes #5678 from AngersZhuuuu/KYUUBO-5677.

Closes #5677

273cd61bf [Angerszhuuuu] Update TypeOfPlaceHolder.scala
8b4383db5 [Angerszhuuuu] done
af5dae541 [Angerszhuuuu] Update RangerSparkExtensionSuite.scala
37edcdf6d [Angerszhuuuu] fix for spark-3.1
df15599a3 [Angerszhuuuu] update
a7dd3c453 [Angerszhuuuu] Update RangerSparkExtension.scala
6b76dac0d [Angerszhuuuu] [KYUUBI #5677][AUTHZ] Typeof expression miss column information
f8a94c72d [Angerszhuuuu] [KYUUBI #5677][AUTHZ] Typeof expression miss column information

Authored-by: Angerszhuuuu <angers.zhu@gmail.com>
Signed-off-by: Kent Yao <yao@apache.org>
This commit is contained in:
Angerszhuuuu 2023-11-15 12:37:43 +08:00 committed by Kent Yao
parent e498bdba00
commit 3c2291ef5c
No known key found for this signature in database
GPG Key ID: F7051850A0AF904D
6 changed files with 160 additions and 2 deletions

View File

@ -19,9 +19,10 @@ package org.apache.kyuubi.plugin.spark.authz.ranger
import org.apache.spark.sql.SparkSessionExtensions
import org.apache.kyuubi.plugin.spark.authz.rule.{RuleEliminateMarker, RuleEliminatePermanentViewMarker}
import org.apache.kyuubi.plugin.spark.authz.rule.{RuleEliminateMarker, RuleEliminatePermanentViewMarker, RuleEliminateTypeOf}
import org.apache.kyuubi.plugin.spark.authz.rule.config.AuthzConfigurationChecker
import org.apache.kyuubi.plugin.spark.authz.rule.datamasking.{RuleApplyDataMaskingStage0, RuleApplyDataMaskingStage1}
import org.apache.kyuubi.plugin.spark.authz.rule.expression.RuleApplyTypeOfMarker
import org.apache.kyuubi.plugin.spark.authz.rule.permanentview.RuleApplyPermanentViewMarker
import org.apache.kyuubi.plugin.spark.authz.rule.rowfilter.{FilterDataSourceV2Strategy, RuleApplyRowFilter, RuleReplaceShowObjectCommands}
@ -46,12 +47,14 @@ class RangerSparkExtension extends (SparkSessionExtensions => Unit) {
v1.injectCheckRule(AuthzConfigurationChecker)
v1.injectResolutionRule(_ => new RuleReplaceShowObjectCommands())
v1.injectResolutionRule(_ => new RuleApplyPermanentViewMarker())
v1.injectResolutionRule(_ => new RuleApplyTypeOfMarker())
v1.injectResolutionRule(RuleApplyRowFilter)
v1.injectResolutionRule(RuleApplyDataMaskingStage0)
v1.injectResolutionRule(RuleApplyDataMaskingStage1)
v1.injectOptimizerRule(_ => new RuleEliminateMarker())
v1.injectOptimizerRule(new RuleAuthorization(_))
v1.injectOptimizerRule(_ => new RuleEliminatePermanentViewMarker())
v1.injectOptimizerRule(_ => new RuleEliminateTypeOf())
v1.injectPlannerStrategy(new FilterDataSourceV2Strategy(_))
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kyuubi.plugin.spark.authz.rule
import org.apache.spark.sql.catalyst.expressions.TypeOf
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
import org.apache.kyuubi.plugin.spark.authz.rule.expression.TypeOfPlaceHolder
class RuleEliminateTypeOf extends Rule[LogicalPlan] {
override def apply(plan: LogicalPlan): LogicalPlan = {
plan.transformUp { case p =>
p.transformExpressionsUp {
case toph: TypeOfPlaceHolder => TypeOf(toph.expr)
}
}
}
}

View File

@ -0,0 +1,31 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kyuubi.plugin.spark.authz.rule.expression
import org.apache.spark.sql.catalyst.expressions.TypeOf
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
import org.apache.spark.sql.catalyst.rules.Rule
class RuleApplyTypeOfMarker extends Rule[LogicalPlan] {
override def apply(plan: LogicalPlan): LogicalPlan = {
plan transformAllExpressions {
case typeof: TypeOf => TypeOfPlaceHolder(typeof.child)
}
}
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kyuubi.plugin.spark.authz.rule.expression
import org.apache.spark.sql.catalyst.expressions.{Expression, UnaryExpression}
import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, ExprCode}
import org.apache.spark.sql.types.{DataType, StringType}
import org.apache.kyuubi.plugin.spark.authz.util.WithInternalExpressionChild
case class TypeOfPlaceHolder(expr: Expression) extends UnaryExpression
with WithInternalExpressionChild {
override def dataType: DataType = StringType
// Avoid fold constant expression by Spark Optimizer
override def foldable: Boolean = false
override def child: Expression = expr
override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
defineCodeGen(ctx, ev, _ => s"""UTF8String.fromString(${child.dataType.catalogString})""")
}
override def withNewChildInternal(newChild: Expression): Expression =
copy(expr = newChild)
}

View File

@ -17,6 +17,7 @@
package org.apache.kyuubi.plugin.spark.authz.util
import org.apache.spark.sql.catalyst.expressions.Expression
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
trait WithInternalChildren {
@ -26,3 +27,7 @@ trait WithInternalChildren {
trait WithInternalChild {
def withNewChildInternal(newChild: LogicalPlan): LogicalPlan
}
trait WithInternalExpressionChild {
def withNewChildInternal(newChild: Expression): Expression
}

View File

@ -23,7 +23,7 @@ import java.nio.file.Path
import scala.util.Try
import org.apache.hadoop.security.UserGroupInformation
import org.apache.spark.sql.SparkSessionExtensions
import org.apache.spark.sql.{Row, SparkSessionExtensions}
import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
import org.apache.spark.sql.catalyst.catalog.HiveTableRelation
import org.apache.spark.sql.catalyst.plans.logical.Statistics
@ -1296,4 +1296,48 @@ class HiveCatalogRangerSparkExtensionSuite extends RangerSparkExtensionSuite {
}
}
}
test("[KYUUBI #5677][AUTHZ] Typeof expression miss column information") {
val db1 = defaultDb
val table1 = "table1"
withSingleCallEnabled {
withCleanTmpResources(Seq((s"$db1.$table1", "table"))) {
doAs(
admin,
sql(
s"""
|CREATE TABLE IF NOT EXISTS $db1.$table1(
|id int,
|scope int,
|day string)
|""".stripMargin))
doAs(admin, sql(s"INSERT INTO $db1.$table1 SELECT 1, 2, 'TONY'"))
interceptContains[AccessControlException](
doAs(
someone,
sql(s"SELECT typeof(id), typeof(typeof(day)) FROM $db1.$table1").collect()))(
s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/day]")
interceptContains[AccessControlException](
doAs(
someone,
sql(
s"""
|SELECT
|typeof(cast(id as string)),
|typeof(substring(day, 1, 3))
|FROM $db1.$table1""".stripMargin).collect()))(
s"does not have [select] privilege on [$db1/$table1/id,$db1/$table1/day]")
checkAnswer(
admin,
s"""
|SELECT
|typeof(id),
|typeof(typeof(day)),
|typeof(cast(id as string)),
|typeof(substring(day, 1, 3))
|FROM $db1.$table1""".stripMargin,
Seq(Row("int", "string", "string", "string")))
}
}
}
}