Spark Conf temp
This commit is contained in:
parent
a1bfcc1273
commit
d70aec651d
@ -20,11 +20,151 @@
|
||||
# kyuubi.authentication NONE
|
||||
# kyuubi.frontend.bind.port 10009
|
||||
#
|
||||
|
||||
## Spark Configurations, they will override those in $SPARK_HOME/conf/spark-defaults.conf
|
||||
#
|
||||
# spark.master local
|
||||
# spark.ui.enabled false
|
||||
# spark.driver.extraJavaOptions -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005
|
||||
## Dummy Ones
|
||||
# spark.master local
|
||||
# spark.submit.deployMode client
|
||||
# spark.ui.enabled false
|
||||
# spark.ui.port 0
|
||||
# spark.driver.extraJavaOptions -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005
|
||||
# spark.scheduler.mode FAIR
|
||||
# spark.serializer org.apache.spark.serializer.KryoSerializer
|
||||
# spark.kryoserializer.buffer.max 128m
|
||||
# spark.buffer.size 131072
|
||||
# spark.local.dir ./local
|
||||
# spark.network.timeout 120s
|
||||
# spark.cleaner.periodicGC.interval 10min
|
||||
|
||||
## Spark Driver / AM Sizing
|
||||
# spark.driver.cores 4
|
||||
# spark.driver.memory 8g
|
||||
# spark.driver.memoryOverhead 2048
|
||||
# spark.driver.extraJavaOptions -XX:MaxDirectMemorySize=2048m
|
||||
# spark.driver.maxResultSize 3g
|
||||
# spark.yarn.am.cores 4
|
||||
# spark.yarn.am.memory 2g
|
||||
# spark.yarn.am.memoryOverhead 1024
|
||||
|
||||
## Spark Executor Sizing
|
||||
# spark.executor.instances 100
|
||||
# spark.executor.cores 4
|
||||
# spark.executor.memory 16g
|
||||
# spark.executor.memoryOverhead 4096
|
||||
# spark.executor.extraJavaOptions -XX:MaxDirectMemorySize=2048m
|
||||
|
||||
## Executor Heartbeat
|
||||
# spark.storage.blockManagerHeartbeatTimeoutMs 300s
|
||||
# spark.executor.heartbeatInterval 15s
|
||||
# spark.executor.heartbeat.maxFailures 30
|
||||
|
||||
|
||||
## Event Queue Capacity
|
||||
# spark.scheduler.revive.interval 1s
|
||||
# spark.scheduler.listenerbus.eventqueue.capacity 100000
|
||||
# spark.scheduler.listenerbus.eventqueue.executorManagement.capacity 100000
|
||||
# spark.scheduler.listenerbus.eventqueue.appStatus.capacity 100000
|
||||
# spark.scheduler.listenerbus.eventqueue.shared.capacity 100000
|
||||
# spark.scheduler.listenerbus.eventqueue.eventLog.capacity 20000
|
||||
|
||||
## Dynamic Allocation
|
||||
# spark.dynamicAllocation.enabled true
|
||||
# spark.dynamicAllocation.initialExecutors 10
|
||||
# spark.dynamicAllocation.minExecutors 10
|
||||
# spark.dynamicAllocation.maxExecutors 500
|
||||
# spark.dynamicAllocation.executorAllocationRatio 0.8
|
||||
# spark.dynamicAllocation.executorIdleTimeout 60s
|
||||
# spark.dynamicAllocation.cachedExecutorIdleTimeout 1h
|
||||
# spark.dynamicAllocation.shuffleTracking.enabled false
|
||||
# spark.dynamicAllocation.shuffleTracking.timeout 30min
|
||||
# spark.dynamicAllocation.schedulerBacklogTimeout 1s
|
||||
# spark.dynamicAllocation.sustainedSchedulerBacklogTimeout 1s
|
||||
|
||||
## External Shuffle Service
|
||||
# spark.shuffle.service.enabled true
|
||||
# spark.shuffle.service.fetch.rdd.enabled true
|
||||
# spark.shuffle.service.port 7337
|
||||
|
||||
## Speculation
|
||||
# spark.speculation true
|
||||
# spark.speculation.interval 1s
|
||||
# spark.speculation.multiplier 1.5
|
||||
# spark.speculation.quantile 0.9
|
||||
# spark.speculation.task.duration.threshold 10min
|
||||
|
||||
## Shuffle Behavior
|
||||
# spark.shuffle.compress true
|
||||
# spark.shuffle.detectCorrupt true
|
||||
# spark.shuffle.detectCorrupt.useExtraMemory true
|
||||
# spark.shuffle.file.buffer 64k
|
||||
# spark.shuffle.unsafe.file.output.buffer 64k
|
||||
# spark.shuffle.spill.diskWriteBufferSize 8k
|
||||
# spark.shuffle.spill.compress true
|
||||
# spark.shuffle.mapOutput.dispatcher.numThreads 12
|
||||
# spark.shuffle.mapOutput.parallelAggregationThreshold 5000
|
||||
# spark.shuffle.readHostLocalDisk true
|
||||
# spark.shuffle.io.maxRetries 10
|
||||
# spark.shuffle.io.retryWait 6s
|
||||
# spark.shuffle.io.preferDirectBufs false
|
||||
# spark.shuffle.io.serverThreads 8
|
||||
# spark.shuffle.io.clientThreads 8
|
||||
# spark.shuffle.io.connectionTimeout 240s
|
||||
# spark.shuffle.registration.timeout 6000
|
||||
# spark.shuffle.registration.maxAttempts 10
|
||||
# spark.shuffle.sync false
|
||||
# spark.shuffle.useOldFetchProtocol true
|
||||
# spark.shuffle.unsafe.fastMergeEnabled true
|
||||
# spark.shuffle.minNumPartitionsToHighlyCompress 100
|
||||
# spark.network.maxRemoteBlockSizeFetchToMem 128m
|
||||
# spark.reducer.maxSizeInFlight 48m
|
||||
# spark.reducer.maxReqsInFlight 256
|
||||
# spark.reducer.maxBlocksInFlightPerAddress 256
|
||||
|
||||
## Data Locality for Task Schedule
|
||||
# spark.locality.wait 0s
|
||||
# spark.locality.wait.process 0s
|
||||
# spark.locality.wait.node 0s
|
||||
# spark.locality.wait.rack 0s
|
||||
|
||||
## Event Logging for History Server
|
||||
# spark.eventLog.enabled true
|
||||
# spark.eventLog.dir hdfs://hadoop-dfs/history
|
||||
# spark.eventLog.compress true
|
||||
# spark.eventLog.longForm.enabled true
|
||||
# spark.eventLog.rolling.enabled true
|
||||
# spark.yarn.historyServer.address http://historyserver:18080
|
||||
|
||||
## SQL
|
||||
## General SQL Settings
|
||||
# spark.sql.shuffle.partitions 8192
|
||||
# spark.sql.optimizer.inSetConversionThreshold 2
|
||||
# spark.sql.autoBroadcastJoinThreshold 64m
|
||||
# spark.sql.broadcastTimeout 600s
|
||||
# spark.sql.join.preferSortMergeJoin true
|
||||
# spark.sql.hive.metastorePartitionPruning true
|
||||
# spark.sql.parquet.filterPushdown true
|
||||
# spark.sql.parquet.recordLevelFilter.enabled true
|
||||
# spark.sql.statistics.fallBackToHdfs true
|
||||
## Dynamic Partition Pruning
|
||||
# spark.sql.optimizer.dynamicPartitionPruning.enabled true
|
||||
# spark.sql.optimizer.dynamicPartitionPruning.useStats true
|
||||
# spark.sql.optimizer.dynamicPartitionPruning.fallbackFilterRatio 0.5
|
||||
# spark.sql.optimizer.dynamicPartitionPruning.reuseBroadcastOnly true
|
||||
|
||||
# Adaptive Query Execution
|
||||
# spark.sql.adaptive.enabled true
|
||||
# spark.sql.adaptive.forceApply false
|
||||
# spark.sql.adaptive.logLevel info
|
||||
# spark.sql.adaptive.advisoryPartitionSizeInBytes 128m
|
||||
# spark.sql.adaptive.coalescePartitions.enabled true
|
||||
# spark.sql.adaptive.coalescePartitions.minPartitionNum 64
|
||||
# spark.sql.adaptive.coalescePartitions.initialPartitionNum
|
||||
# spark.sql.adaptive.fetchShuffleBlocksInBatch true
|
||||
# spark.sql.adaptive.localShuffleReader.enabled true
|
||||
# spark.sql.adaptive.skewJoin.enabled true
|
||||
# spark.sql.adaptive.skewJoin.skewedPartitionFactor 5
|
||||
# spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes 256m
|
||||
# spark.sql.adaptive.nonEmptyPartitionRatioForBroadcastJoin 0.2
|
||||
|
||||
## Hadoop Configurations, they will override those in $HADOOP_CONF_DIR
|
||||
#
|
||||
|
||||
@ -39,3 +39,10 @@
|
||||
# - SPARK_CONF_DIR Optional directory where the Spark configuration lives.
|
||||
# (Default: $SPARK_HOME/conf)
|
||||
#
|
||||
|
||||
|
||||
## Examples ##
|
||||
|
||||
# export JAVA_HOME=/usr/jdk64/jdk1.8.0_152
|
||||
# export HADOOP_CONF_DIR=/usr/ndp/current/mapreduce_client/conf
|
||||
# export KYUUBI_JAVA_OPTS="-Xmx10g -XX:+UnlockDiagnosticVMOptions -XX:ParGCCardsPerStrideChunk=4096 -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+UseCondCardMark -XX:MaxDirectMemorySize=1024m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./logs -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution -Xloggc:./logs/kyuubi-server-gc-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=5M -XX:NewRatio=3 -XX:MetaspaceSize=512m"
|
||||
|
||||
@ -58,6 +58,13 @@ You can configure the environment variables in `$KYUUBI_HOME/conf/kyuubi-env.sh`
|
||||
# - SPARK_CONF_DIR Optional directory where the Spark configuration lives.
|
||||
# (Default: $SPARK_HOME/conf)
|
||||
#
|
||||
|
||||
|
||||
## Examples ##
|
||||
|
||||
# export JAVA_HOME=/usr/jdk64/jdk1.8.0_152
|
||||
# export HADOOP_CONF_DIR=/usr/ndp/current/mapreduce_client/conf
|
||||
# export KYUUBI_JAVA_OPTS="-Xmx10g -XX:+UnlockDiagnosticVMOptions -XX:ParGCCardsPerStrideChunk=4096 -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+CMSConcurrentMTEnabled -XX:CMSInitiatingOccupancyFraction=70 -XX:+UseCMSInitiatingOccupancyOnly -XX:+CMSClassUnloadingEnabled -XX:+CMSParallelRemarkEnabled -XX:+UseCondCardMark -XX:MaxDirectMemorySize=1024m -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=./logs -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintTenuringDistribution -Xloggc:./logs/kyuubi-server-gc-%t.log -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=5M -XX:NewRatio=3 -XX:MetaspaceSize=512m"
|
||||
```
|
||||
## Kyuubi Configurations
|
||||
|
||||
@ -86,11 +93,151 @@ You can configure the Kyuubi properties in `$KYUUBI_HOME/conf/kyuubi-defaults.co
|
||||
# kyuubi.authentication NONE
|
||||
# kyuubi.frontend.bind.port 10009
|
||||
#
|
||||
|
||||
## Spark Configurations, they will override those in $SPARK_HOME/conf/spark-defaults.conf
|
||||
#
|
||||
# spark.master local
|
||||
# spark.ui.enabled false
|
||||
# spark.driver.extraJavaOptions -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005
|
||||
## Dummy Ones
|
||||
# spark.master local
|
||||
# spark.submit.deployMode client
|
||||
# spark.ui.enabled false
|
||||
# spark.ui.port 0
|
||||
# spark.driver.extraJavaOptions -agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=5005
|
||||
# spark.scheduler.mode FAIR
|
||||
# spark.serializer org.apache.spark.serializer.KryoSerializer
|
||||
# spark.kryoserializer.buffer.max 128m
|
||||
# spark.buffer.size 131072
|
||||
# spark.local.dir ./local
|
||||
# spark.network.timeout 120s
|
||||
# spark.cleaner.periodicGC.interval 10min
|
||||
|
||||
## Spark Driver / AM Sizing
|
||||
# spark.driver.cores 4
|
||||
# spark.driver.memory 8g
|
||||
# spark.driver.memoryOverhead 2048
|
||||
# spark.driver.extraJavaOptions -XX:MaxDirectMemorySize=2048m
|
||||
# spark.driver.maxResultSize 3g
|
||||
# spark.yarn.am.cores 4
|
||||
# spark.yarn.am.memory 2g
|
||||
# spark.yarn.am.memoryOverhead 1024
|
||||
|
||||
## Spark Executor Sizing
|
||||
# spark.executor.instances 100
|
||||
# spark.executor.cores 4
|
||||
# spark.executor.memory 16g
|
||||
# spark.executor.memoryOverhead 4096
|
||||
# spark.executor.extraJavaOptions -XX:MaxDirectMemorySize=2048m
|
||||
|
||||
## Executor Heartbeat
|
||||
# spark.storage.blockManagerHeartbeatTimeoutMs 300s
|
||||
# spark.executor.heartbeatInterval 15s
|
||||
# spark.executor.heartbeat.maxFailures 30
|
||||
|
||||
|
||||
## Event Queue Capacity
|
||||
# spark.scheduler.revive.interval 1s
|
||||
# spark.scheduler.listenerbus.eventqueue.capacity 100000
|
||||
# spark.scheduler.listenerbus.eventqueue.executorManagement.capacity 100000
|
||||
# spark.scheduler.listenerbus.eventqueue.appStatus.capacity 100000
|
||||
# spark.scheduler.listenerbus.eventqueue.shared.capacity 100000
|
||||
# spark.scheduler.listenerbus.eventqueue.eventLog.capacity 20000
|
||||
|
||||
## Dynamic Allocation
|
||||
# spark.dynamicAllocation.enabled true
|
||||
# spark.dynamicAllocation.initialExecutors 10
|
||||
# spark.dynamicAllocation.minExecutors 10
|
||||
# spark.dynamicAllocation.maxExecutors 500
|
||||
# spark.dynamicAllocation.executorAllocationRatio 0.8
|
||||
# spark.dynamicAllocation.executorIdleTimeout 60s
|
||||
# spark.dynamicAllocation.cachedExecutorIdleTimeout 1h
|
||||
# spark.dynamicAllocation.shuffleTracking.enabled false
|
||||
# spark.dynamicAllocation.shuffleTracking.timeout 30min
|
||||
# spark.dynamicAllocation.schedulerBacklogTimeout 1s
|
||||
# spark.dynamicAllocation.sustainedSchedulerBacklogTimeout 1s
|
||||
|
||||
## External Shuffle Service
|
||||
# spark.shuffle.service.enabled true
|
||||
# spark.shuffle.service.fetch.rdd.enabled true
|
||||
# spark.shuffle.service.port 7337
|
||||
|
||||
## Speculation
|
||||
# spark.speculation true
|
||||
# spark.speculation.interval 1s
|
||||
# spark.speculation.multiplier 1.5
|
||||
# spark.speculation.quantile 0.9
|
||||
# spark.speculation.task.duration.threshold 10min
|
||||
|
||||
## Shuffle Behavior
|
||||
# spark.shuffle.compress true
|
||||
# spark.shuffle.detectCorrupt true
|
||||
# spark.shuffle.detectCorrupt.useExtraMemory true
|
||||
# spark.shuffle.file.buffer 64k
|
||||
# spark.shuffle.unsafe.file.output.buffer 64k
|
||||
# spark.shuffle.spill.diskWriteBufferSize 8k
|
||||
# spark.shuffle.spill.compress true
|
||||
# spark.shuffle.mapOutput.dispatcher.numThreads 12
|
||||
# spark.shuffle.mapOutput.parallelAggregationThreshold 5000
|
||||
# spark.shuffle.readHostLocalDisk true
|
||||
# spark.shuffle.io.maxRetries 10
|
||||
# spark.shuffle.io.retryWait 6s
|
||||
# spark.shuffle.io.preferDirectBufs false
|
||||
# spark.shuffle.io.serverThreads 8
|
||||
# spark.shuffle.io.clientThreads 8
|
||||
# spark.shuffle.io.connectionTimeout 240s
|
||||
# spark.shuffle.registration.timeout 6000
|
||||
# spark.shuffle.registration.maxAttempts 10
|
||||
# spark.shuffle.sync false
|
||||
# spark.shuffle.useOldFetchProtocol true
|
||||
# spark.shuffle.unsafe.fastMergeEnabled true
|
||||
# spark.shuffle.minNumPartitionsToHighlyCompress 100
|
||||
# spark.network.maxRemoteBlockSizeFetchToMem 128m
|
||||
# spark.reducer.maxSizeInFlight 48m
|
||||
# spark.reducer.maxReqsInFlight 256
|
||||
# spark.reducer.maxBlocksInFlightPerAddress 256
|
||||
|
||||
## Data Locality for Task Schedule
|
||||
# spark.locality.wait 0s
|
||||
# spark.locality.wait.process 0s
|
||||
# spark.locality.wait.node 0s
|
||||
# spark.locality.wait.rack 0s
|
||||
|
||||
## Event Logging for History Server
|
||||
# spark.eventLog.enabled true
|
||||
# spark.eventLog.dir hdfs://hadoop-dfs/history
|
||||
# spark.eventLog.compress true
|
||||
# spark.eventLog.longForm.enabled true
|
||||
# spark.eventLog.rolling.enabled true
|
||||
# spark.yarn.historyServer.address http://historyserver:18080
|
||||
|
||||
## SQL
|
||||
## General SQL Settings
|
||||
# spark.sql.shuffle.partitions 8192
|
||||
# spark.sql.optimizer.inSetConversionThreshold 2
|
||||
# spark.sql.autoBroadcastJoinThreshold 64m
|
||||
# spark.sql.broadcastTimeout 600s
|
||||
# spark.sql.join.preferSortMergeJoin true
|
||||
# spark.sql.hive.metastorePartitionPruning true
|
||||
# spark.sql.parquet.filterPushdown true
|
||||
# spark.sql.parquet.recordLevelFilter.enabled true
|
||||
# spark.sql.statistics.fallBackToHdfs true
|
||||
## Dynamic Partition Pruning
|
||||
# spark.sql.optimizer.dynamicPartitionPruning.enabled true
|
||||
# spark.sql.optimizer.dynamicPartitionPruning.useStats true
|
||||
# spark.sql.optimizer.dynamicPartitionPruning.fallbackFilterRatio 0.5
|
||||
# spark.sql.optimizer.dynamicPartitionPruning.reuseBroadcastOnly true
|
||||
|
||||
# Adaptive Query Execution
|
||||
# spark.sql.adaptive.enabled true
|
||||
# spark.sql.adaptive.forceApply false
|
||||
# spark.sql.adaptive.logLevel info
|
||||
# spark.sql.adaptive.advisoryPartitionSizeInBytes 128m
|
||||
# spark.sql.adaptive.coalescePartitions.enabled true
|
||||
# spark.sql.adaptive.coalescePartitions.minPartitionNum 64
|
||||
# spark.sql.adaptive.coalescePartitions.initialPartitionNum
|
||||
# spark.sql.adaptive.fetchShuffleBlocksInBatch true
|
||||
# spark.sql.adaptive.localShuffleReader.enabled true
|
||||
# spark.sql.adaptive.skewJoin.enabled true
|
||||
# spark.sql.adaptive.skewJoin.skewedPartitionFactor 5
|
||||
# spark.sql.adaptive.skewJoin.skewedPartitionThresholdInBytes 256m
|
||||
# spark.sql.adaptive.nonEmptyPartitionRatioForBroadcastJoin 0.2
|
||||
|
||||
## Hadoop Configurations, they will override those in $HADOOP_CONF_DIR
|
||||
#
|
||||
|
||||
Loading…
Reference in New Issue
Block a user