 [](https://github.com/yaooqinn/kyuubi/pull/388)     [❨?❩](https://pullrequestbadge.com/?utm_medium=github&utm_source=yaooqinn&utm_campaign=badge_info)<!-- PR-BADGE: PLEASE DO NOT REMOVE THIS COMMENT --> <!-- Thanks for sending a pull request! Here are some tips for you: 1. If this is your first time, please read our contributor guidelines: https://kyuubi.readthedocs.io/en/latest/community/contributions.html 2. If the PR is related to an issue in https://github.com/yaooqinn/kyuubi/issues, add '[KYUUBI #XXXX]' in your PR title, e.g., '[KYUUBI #XXXX] Your PR title ...'. 3. If the PR is unfinished, add '[WIP]' in your PR title, e.g., '[WIP][KYUUBI #XXXX] Your PR title ...'. --> ### _Why are the changes needed?_ <!-- Please clarify why the changes are needed. For instance, 1. If you add a feature, you can talk about the use case of it. 2. If you fix a bug, you can clarify why it is a bug. --> User guide ### _How was this patch tested?_ - [ ] Add some test cases that check the changes thoroughly including negative and positive cases if possible - [ ] Add screenshots for manual tests if appropriate - [x] [Run test](https://kyuubi.readthedocs.io/en/latest/tools/testing.html#running-tests) locally before make a pull request Closes #388 from yaooqinn/aqe. dc2833f [Kent Yao] How To Use Spark Adaptive Query Execution (AQE) in Kyuubi Authored-by: Kent Yao <yao@apache.org> Signed-off-by: Kent Yao <yao@apache.org>
141 lines
6.5 KiB
Plaintext
141 lines
6.5 KiB
Plaintext
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
## Kyuubi Configurations
|
|
#
|
|
# kyuubi.authentication NONE
|
|
# kyuubi.frontend.bind.port 10009
|
|
#
|
|
|
|
## Spark Configurations, they will override those in $SPARK_HOME/conf/spark-defaults.conf
|
|
## Dummy Ones
|
|
# spark.master local
|
|
# spark.submit.deployMode client
|
|
# spark.ui.enabled false
|
|
# spark.ui.port 0
|
|
# spark.driver.extraJavaOptions -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005
|
|
# spark.scheduler.mode FAIR
|
|
# spark.serializer org.apache.spark.serializer.KryoSerializer
|
|
# spark.kryoserializer.buffer.max 128m
|
|
# spark.buffer.size 131072
|
|
# spark.local.dir ./local
|
|
# spark.network.timeout 120s
|
|
# spark.cleaner.periodicGC.interval 10min
|
|
|
|
## Spark Driver / AM Sizing
|
|
# spark.driver.cores 4
|
|
# spark.driver.memory 8g
|
|
# spark.driver.memoryOverhead 2048
|
|
# spark.driver.extraJavaOptions -XX:MaxDirectMemorySize=2048m
|
|
# spark.driver.maxResultSize 3g
|
|
# spark.yarn.am.cores 4
|
|
# spark.yarn.am.memory 2g
|
|
# spark.yarn.am.memoryOverhead 1024
|
|
|
|
## Spark Executor Sizing
|
|
# spark.executor.instances 100
|
|
# spark.executor.cores 4
|
|
# spark.executor.memory 16g
|
|
# spark.executor.memoryOverhead 4096
|
|
# spark.executor.extraJavaOptions -XX:MaxDirectMemorySize=2048m
|
|
|
|
## Executor Heartbeat
|
|
# spark.storage.blockManagerHeartbeatTimeoutMs 300s
|
|
# spark.executor.heartbeatInterval 15s
|
|
# spark.executor.heartbeat.maxFailures 30
|
|
|
|
|
|
## Event Queue Capacity
|
|
# spark.scheduler.revive.interval 1s
|
|
# spark.scheduler.listenerbus.eventqueue.capacity 100000
|
|
# spark.scheduler.listenerbus.eventqueue.executorManagement.capacity 100000
|
|
# spark.scheduler.listenerbus.eventqueue.appStatus.capacity 100000
|
|
# spark.scheduler.listenerbus.eventqueue.shared.capacity 100000
|
|
# spark.scheduler.listenerbus.eventqueue.eventLog.capacity 20000
|
|
|
|
## External Shuffle Service
|
|
# spark.shuffle.service.enabled true
|
|
# spark.shuffle.service.fetch.rdd.enabled true
|
|
# spark.shuffle.service.port 7337
|
|
|
|
## Speculation
|
|
# spark.speculation true
|
|
# spark.speculation.interval 1s
|
|
# spark.speculation.multiplier 1.5
|
|
# spark.speculation.quantile 0.9
|
|
# spark.speculation.task.duration.threshold 10min
|
|
|
|
## Shuffle Behavior
|
|
# spark.shuffle.compress true
|
|
# spark.shuffle.detectCorrupt true
|
|
# spark.shuffle.detectCorrupt.useExtraMemory true
|
|
# spark.shuffle.file.buffer 64k
|
|
# spark.shuffle.unsafe.file.output.buffer 64k
|
|
# spark.shuffle.spill.diskWriteBufferSize 8k
|
|
# spark.shuffle.spill.compress true
|
|
# spark.shuffle.mapOutput.dispatcher.numThreads 12
|
|
# spark.shuffle.mapOutput.parallelAggregationThreshold 5000
|
|
# spark.shuffle.readHostLocalDisk true
|
|
# spark.shuffle.io.maxRetries 10
|
|
# spark.shuffle.io.retryWait 6s
|
|
# spark.shuffle.io.preferDirectBufs false
|
|
# spark.shuffle.io.serverThreads 8
|
|
# spark.shuffle.io.clientThreads 8
|
|
# spark.shuffle.io.connectionTimeout 240s
|
|
# spark.shuffle.registration.timeout 6000
|
|
# spark.shuffle.registration.maxAttempts 10
|
|
# spark.shuffle.sync false
|
|
# spark.shuffle.useOldFetchProtocol true
|
|
# spark.shuffle.unsafe.fastMergeEnabled true
|
|
# spark.shuffle.minNumPartitionsToHighlyCompress 100
|
|
# spark.network.maxRemoteBlockSizeFetchToMem 128m
|
|
# spark.reducer.maxSizeInFlight 48m
|
|
# spark.reducer.maxReqsInFlight 256
|
|
# spark.reducer.maxBlocksInFlightPerAddress 256
|
|
|
|
## Data Locality for Task Schedule
|
|
# spark.locality.wait 0s
|
|
# spark.locality.wait.process 0s
|
|
# spark.locality.wait.node 0s
|
|
# spark.locality.wait.rack 0s
|
|
|
|
## Event Logging for History Server
|
|
# spark.eventLog.enabled true
|
|
# spark.eventLog.dir hdfs://hadoop-dfs/history
|
|
# spark.eventLog.compress true
|
|
# spark.eventLog.longForm.enabled true
|
|
# spark.eventLog.rolling.enabled true
|
|
# spark.yarn.historyServer.address http://historyserver:18080
|
|
|
|
## SQL
|
|
## General SQL Settings
|
|
# spark.sql.shuffle.partitions 8192
|
|
# spark.sql.optimizer.inSetConversionThreshold 2
|
|
# spark.sql.autoBroadcastJoinThreshold 64m
|
|
# spark.sql.broadcastTimeout 600s
|
|
# spark.sql.join.preferSortMergeJoin true
|
|
# spark.sql.hive.metastorePartitionPruning true
|
|
# spark.sql.parquet.filterPushdown true
|
|
# spark.sql.parquet.recordLevelFilter.enabled true
|
|
# spark.sql.statistics.fallBackToHdfs true
|
|
## Dynamic Partition Pruning
|
|
# spark.sql.optimizer.dynamicPartitionPruning.enabled true
|
|
# spark.sql.optimizer.dynamicPartitionPruning.useStats true
|
|
# spark.sql.optimizer.dynamicPartitionPruning.fallbackFilterRatio 0.5
|
|
# spark.sql.optimizer.dynamicPartitionPruning.reuseBroadcastOnly true
|
|
|