Spark Conf temp

This commit is contained in:
Kent Yao 2020-12-31 15:02:21 +08:00 committed by Kent Yao
parent a1bfcc1273
commit d70aec651d
No known key found for this signature in database
GPG Key ID: A4F0BE81C89B595B
3 changed files with 302 additions and 8 deletions

View File

@ -20,11 +20,151 @@
# kyuubi.authentication NONE
# kyuubi.frontend.bind.port 10009
#
## Spark Configurations, they will override those in $SPARK_HOME/conf/spark-defaults.conf
#
# spark.master local
# spark.ui.enabled false
# spark.driver.extraJavaOptions -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005
## Dummy Ones
# spark.master local
# spark.submit.deployMode client
# spark.ui.enabled false
# spark.ui.port 0
# spark.driver.extraJavaOptions -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005
# spark.scheduler.mode FAIR
# spark.serializer org.apache.spark.serializer.KryoSerializer
# spark.kryoserializer.buffer.max 128m
# spark.buffer.size 131072
# spark.local.dir ./local
# spark.network.timeout 120s
# spark.cleaner.periodicGC.interval 10min
## Spark Driver / AM Sizing
# spark.driver.cores 4
# spark.driver.memory 8g
# spark.driver.memoryOverhead 2048
# spark.driver.extraJavaOptions -XX:MaxDirectMemorySize=2048m
# spark.driver.maxResultSize 3g
# spark.yarn.am.cores 4
# spark.yarn.am.memory 2g
# spark.yarn.am.memoryOverhead 1024
## Spark Executor Sizing
# spark.executor.instances 100
# spark.executor.cores 4
# spark.executor.memory 16g
# spark.executor.memoryOverhead 4096
# spark.executor.extraJavaOptions -XX:MaxDirectMemorySize=2048m
## Executor Heartbeat
# spark.storage.blockManagerHeartbeatTimeoutMs 300s
# spark.executor.heartbeatInterval 15s
# spark.executor.heartbeat.maxFailures 30
## Event Queue Capacity
# spark.scheduler.revive.interval 1s
# spark.scheduler.listenerbus.eventqueue.capacity 100000
# spark.scheduler.listenerbus.eventqueue.executorManagement.capacity 100000
# spark.scheduler.listenerbus.eventqueue.appStatus.capacity 100000
# spark.scheduler.listenerbus.eventqueue.shared.capacity 100000
# spark.scheduler.listenerbus.eventqueue.eventLog.capacity 20000
## Dynamic Allocation
# spark.dynamicAllocation.enabled true
# spark.dynamicAllocation.initialExecutors 10
# spark.dynamicAllocation.minExecutors 10
# spark.dynamicAllocation.maxExecutors 500
# spark.dynamicAllocation.executorAllocationRatio 0.8
# spark.dynamicAllocation.executorIdleTimeout 60s
# spark.dynamicAllocation.cachedExecutorIdleTimeout 1h
# spark.dynamicAllocation.shuffleTracking.enabled false
# spark.dynamicAllocation.shuffleTracking.timeout 30min
# spark.dynamicAllocation.schedulerBacklogTimeout 1s
# spark.dynamicAllocation.sustainedSchedulerBacklogTimeout 1s
## External Shuffle Service
# spark.shuffle.service.enabled true
# spark.shuffle.service.fetch.rdd.enabled true
# spark.shuffle.service.port 7337
## Speculation
# spark.speculation true
# spark.speculation.interval 1s
# spark.speculation.multiplier 1.5
# spark.speculation.quantile 0.9
# spark.speculation.task.duration.threshold 10min
## Shuffle Behavior
# spark.shuffle.compress true
# spark.shuffle.detectCorrupt true
# spark.shuffle.detectCorrupt.useExtraMemory true
# spark.shuffle.file.buffer 64k
# spark.shuffle.unsafe.file.output.buffer 64k
# spark.shuffle.spill.diskWriteBufferSize 8k
# spark.shuffle.spill.compress true
# spark.shuffle.mapOutput.dispatcher.numThreads 12
# spark.shuffle.mapOutput.parallelAggregationThreshold 5000
# spark.shuffle.readHostLocalDisk true
# spark.shuffle.io.maxRetries 10
# spark.shuffle.io.retryWait 6s
# spark.shuffle.io.preferDirectBufs false
# spark.shuffle.io.serverThreads 8
# spark.shuffle.io.clientThreads 8
# spark.shuffle.io.connectionTimeout 240s
# spark.shuffle.registration.timeout 6000
# spark.shuffle.registration.maxAttempts 10
# spark.shuffle.sync false
# spark.shuffle.useOldFetchProtocol true
# spark.shuffle.unsafe.fastMergeEnabled true
# spark.shuffle.minNumPartitionsToHighlyCompress 100
# spark.network.maxRemoteBlockSizeFetchToMem 128m
# spark.reducer.maxSizeInFlight 48m
# spark.reducer.maxReqsInFlight 256
# spark.reducer.maxBlocksInFlightPerAddress 256
## Data Locality for Task Schedule
# spark.locality.wait 0s
# spark.locality.wait.process 0s
# spark.locality.wait.node 0s
# spark.locality.wait.rack 0s
## Event Logging for History Server
# spark.eventLog.enabled true
# spark.eventLog.dir hdfs://hadoop-dfs/history
# spark.eventLog.compress true
# spark.eventLog.longForm.enabled true
# spark.eventLog.rolling.enabled true
# spark.yarn.historyServer.address http://historyserver:18080
## SQL
## General SQL Settings
# spark.sql.shuffle.partitions 8192
# spark.sql.optimizer.inSetConversionThreshold 2
# spark.sql.autoBroadcastJoinThreshold 64m
# spark.sql.broadcastTimeout 600s
# spark.sql.join.preferSortMergeJoin true
# spark.sql.hive.metastorePartitionPruning true
# spark.sql.parquet.filterPushdown true
# spark.sql.parquet.recordLevelFilter.enabled true
# spark.sql.statistics.fallBackToHdfs true
## Dynamic Partition Pruning
# spark.sql.optimizer.dynamicPartitionPruning.enabled true
# spark.sql.optimizer.dynamicPartitionPruning.useStats true
# spark.sql.optimizer.dynamicPartitionPruning.fallbackFilterRatio 0.5
# spark.sql.optimizer.dynamicPartitionPruning.reuseBroadcastOnly true
# Adaptive Query Execution
# spark.sql.adaptive.enabled true
# spark.sql.adaptive.forceApply false
# spark.sql.adaptive.logLevel info
# spark.sql.adaptive.advisoryPartitionSizeInBytes 128m
# spark.sql.adaptive.coalescePartitions.enabled true
# spark.sql.adaptive.coalescePartitions.minPartitionNum 64
# spark.sql.adaptive.coalescePartitions.initialPartitionNum
# spark.sql.adaptive.fetchShuffleBlocksInBatch true
# spark.sql.adaptive.localShuffleReader.enabled true
# spark.sql.adaptive.skewJoin.enabled true
# spark.sql.adaptive.skewJoin.skewedPartitionFactor 5
# spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes 256m
# spark.sql.adaptive.nonEmptyPartitionRatioForBroadcastJoin 0.2
## Hadoop Configurations, they will override those in $HADOOP_CONF_DIR
#

View File

@ -39,3 +39,10 @@
# - SPARK_CONF_DIR Optional directory where the Spark configuration lives.
# (Default: $SPARK_HOME/conf)
#
## Examples ##
# export JAVA_HOME=/usr/jdk64/jdk1.8.0_152
# export HADOOP_CONF_DIR=/usr/ndp/current/mapreduce_client/conf
# export KYUUBI_JAVA_OPTS="-Xmx10g -XX:+UnlockDiagnosticVMOptions -XX:ParGCCardsPerStrideChunk=4096 -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+UseCondCardMark -XX:MaxDirectMemorySize=1024m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./logs -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution -Xloggc:./logs/kyuubi-server-gc-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=5M -XX:NewRatio=3 -XX:MetaspaceSize=512m"

View File

@ -58,6 +58,13 @@ You can configure the environment variables in `$KYUUBI_HOME/conf/kyuubi-env.sh`
# - SPARK_CONF_DIR Optional directory where the Spark configuration lives.
# (Default: $SPARK_HOME/conf)
#
## Examples ##
# export JAVA_HOME=/usr/jdk64/jdk1.8.0_152
# export HADOOP_CONF_DIR=/usr/ndp/current/mapreduce_client/conf
# export KYUUBI_JAVA_OPTS="-Xmx10g -XX:+UnlockDiagnosticVMOptions -XX:ParGCCardsPerStrideChunk=4096 -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+UseCondCardMark -XX:MaxDirectMemorySize=1024m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./logs -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution -Xloggc:./logs/kyuubi-server-gc-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=5M -XX:NewRatio=3 -XX:MetaspaceSize=512m"
```
## Kyuubi Configurations
@ -86,11 +93,151 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co
# kyuubi.authentication NONE
# kyuubi.frontend.bind.port 10009
#
## Spark Configurations, they will override those in $SPARK_HOME/conf/spark-defaults.conf
#
# spark.master local
# spark.ui.enabled false
# spark.driver.extraJavaOptions -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005
## Dummy Ones
# spark.master local
# spark.submit.deployMode client
# spark.ui.enabled false
# spark.ui.port 0
# spark.driver.extraJavaOptions -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005
# spark.scheduler.mode FAIR
# spark.serializer org.apache.spark.serializer.KryoSerializer
# spark.kryoserializer.buffer.max 128m
# spark.buffer.size 131072
# spark.local.dir ./local
# spark.network.timeout 120s
# spark.cleaner.periodicGC.interval 10min
## Spark Driver / AM Sizing
# spark.driver.cores 4
# spark.driver.memory 8g
# spark.driver.memoryOverhead 2048
# spark.driver.extraJavaOptions -XX:MaxDirectMemorySize=2048m
# spark.driver.maxResultSize 3g
# spark.yarn.am.cores 4
# spark.yarn.am.memory 2g
# spark.yarn.am.memoryOverhead 1024
## Spark Executor Sizing
# spark.executor.instances 100
# spark.executor.cores 4
# spark.executor.memory 16g
# spark.executor.memoryOverhead 4096
# spark.executor.extraJavaOptions -XX:MaxDirectMemorySize=2048m
## Executor Heartbeat
# spark.storage.blockManagerHeartbeatTimeoutMs 300s
# spark.executor.heartbeatInterval 15s
# spark.executor.heartbeat.maxFailures 30
## Event Queue Capacity
# spark.scheduler.revive.interval 1s
# spark.scheduler.listenerbus.eventqueue.capacity 100000
# spark.scheduler.listenerbus.eventqueue.executorManagement.capacity 100000
# spark.scheduler.listenerbus.eventqueue.appStatus.capacity 100000
# spark.scheduler.listenerbus.eventqueue.shared.capacity 100000
# spark.scheduler.listenerbus.eventqueue.eventLog.capacity 20000
## Dynamic Allocation
# spark.dynamicAllocation.enabled true
# spark.dynamicAllocation.initialExecutors 10
# spark.dynamicAllocation.minExecutors 10
# spark.dynamicAllocation.maxExecutors 500
# spark.dynamicAllocation.executorAllocationRatio 0.8
# spark.dynamicAllocation.executorIdleTimeout 60s
# spark.dynamicAllocation.cachedExecutorIdleTimeout 1h
# spark.dynamicAllocation.shuffleTracking.enabled false
# spark.dynamicAllocation.shuffleTracking.timeout 30min
# spark.dynamicAllocation.schedulerBacklogTimeout 1s
# spark.dynamicAllocation.sustainedSchedulerBacklogTimeout 1s
## External Shuffle Service
# spark.shuffle.service.enabled true
# spark.shuffle.service.fetch.rdd.enabled true
# spark.shuffle.service.port 7337
## Speculation
# spark.speculation true
# spark.speculation.interval 1s
# spark.speculation.multiplier 1.5
# spark.speculation.quantile 0.9
# spark.speculation.task.duration.threshold 10min
## Shuffle Behavior
# spark.shuffle.compress true
# spark.shuffle.detectCorrupt true
# spark.shuffle.detectCorrupt.useExtraMemory true
# spark.shuffle.file.buffer 64k
# spark.shuffle.unsafe.file.output.buffer 64k
# spark.shuffle.spill.diskWriteBufferSize 8k
# spark.shuffle.spill.compress true
# spark.shuffle.mapOutput.dispatcher.numThreads 12
# spark.shuffle.mapOutput.parallelAggregationThreshold 5000
# spark.shuffle.readHostLocalDisk true
# spark.shuffle.io.maxRetries 10
# spark.shuffle.io.retryWait 6s
# spark.shuffle.io.preferDirectBufs false
# spark.shuffle.io.serverThreads 8
# spark.shuffle.io.clientThreads 8
# spark.shuffle.io.connectionTimeout 240s
# spark.shuffle.registration.timeout 6000
# spark.shuffle.registration.maxAttempts 10
# spark.shuffle.sync false
# spark.shuffle.useOldFetchProtocol true
# spark.shuffle.unsafe.fastMergeEnabled true
# spark.shuffle.minNumPartitionsToHighlyCompress 100
# spark.network.maxRemoteBlockSizeFetchToMem 128m
# spark.reducer.maxSizeInFlight 48m
# spark.reducer.maxReqsInFlight 256
# spark.reducer.maxBlocksInFlightPerAddress 256
## Data Locality for Task Schedule
# spark.locality.wait 0s
# spark.locality.wait.process 0s
# spark.locality.wait.node 0s
# spark.locality.wait.rack 0s
## Event Logging for History Server
# spark.eventLog.enabled true
# spark.eventLog.dir hdfs://hadoop-dfs/history
# spark.eventLog.compress true
# spark.eventLog.longForm.enabled true
# spark.eventLog.rolling.enabled true
# spark.yarn.historyServer.address http://historyserver:18080
## SQL
## General SQL Settings
# spark.sql.shuffle.partitions 8192
# spark.sql.optimizer.inSetConversionThreshold 2
# spark.sql.autoBroadcastJoinThreshold 64m
# spark.sql.broadcastTimeout 600s
# spark.sql.join.preferSortMergeJoin true
# spark.sql.hive.metastorePartitionPruning true
# spark.sql.parquet.filterPushdown true
# spark.sql.parquet.recordLevelFilter.enabled true
# spark.sql.statistics.fallBackToHdfs true
## Dynamic Partition Pruning
# spark.sql.optimizer.dynamicPartitionPruning.enabled true
# spark.sql.optimizer.dynamicPartitionPruning.useStats true
# spark.sql.optimizer.dynamicPartitionPruning.fallbackFilterRatio 0.5
# spark.sql.optimizer.dynamicPartitionPruning.reuseBroadcastOnly true
# Adaptive Query Execution
# spark.sql.adaptive.enabled true
# spark.sql.adaptive.forceApply false
# spark.sql.adaptive.logLevel info
# spark.sql.adaptive.advisoryPartitionSizeInBytes 128m
# spark.sql.adaptive.coalescePartitions.enabled true
# spark.sql.adaptive.coalescePartitions.minPartitionNum 64
# spark.sql.adaptive.coalescePartitions.initialPartitionNum
# spark.sql.adaptive.fetchShuffleBlocksInBatch true
# spark.sql.adaptive.localShuffleReader.enabled true
# spark.sql.adaptive.skewJoin.enabled true
# spark.sql.adaptive.skewJoin.skewedPartitionFactor 5
# spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes 256m
# spark.sql.adaptive.nonEmptyPartitionRatioForBroadcastJoin 0.2
## Hadoop Configurations, they will override those in $HADOOP_CONF_DIR
#