[CLEBORN-1555] Replace deprecated config celeborn.storage.activeTypes in docs and tests

### What changes were proposed in this pull request?

Replace the deprecated config `celeborn.storage.activeTypes` with `celeborn.storage.availableTypes` in docs and tests, guiding the new comers to use the new config names.

### Why are the changes needed?
The config `celeborn.storage.activeTypes` has been deprecated in 0.4.0 release.

### Does this PR introduce _any_ user-facing change?

No.

### How was this patch tested?

No feature changed.

Closes #2675 from bowenliang123/avai-types.

Authored-by: Bowen Liang <liangbowen@gf.com.cn>
Signed-off-by: Shuang <lvshuang.xjs@alibaba-inc.com>
This commit is contained in:
Bowen Liang 2024-08-26 14:36:01 +08:00 committed by Shuang
parent 9f0af3456a
commit f226424b9a
8 changed files with 23 additions and 23 deletions

View File

@ -150,7 +150,7 @@ celeborn.worker.storage.dirs /mnt/disk1:disktype=SSD,/mnt/disk2:disktype=SSD
# If Celeborn workers don't have local disks. You can use HDFS.
# Do not set `celeborn.worker.storage.dirs` and use following configs.
celeborn.storage.activeTypes HDFS
celeborn.storage.availableTypes HDFS
celeborn.worker.sortPartition.threads 64
celeborn.worker.commitFiles.timeout 240s
celeborn.worker.commitFiles.threads 128
@ -199,7 +199,7 @@ celeborn.worker.storage.dirs /mnt/disk1:disktype=SSD,/mnt/disk2:disktype=SSD
# If Celeborn workers don't have local disks. You can use HDFS.
# Do not set `celeborn.worker.storage.dirs` and use following configs.
celeborn.storage.activeTypes HDFS
celeborn.storage.availableTypes HDFS
celeborn.worker.sortPartition.threads 64
celeborn.worker.commitFiles.timeout 240s
celeborn.worker.commitFiles.threads 128

View File

@ -1768,7 +1768,7 @@ object CelebornConf extends Logging {
.categories("network")
.version("0.2.0")
.doc("Timeout for RPC ask operations. " +
"It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.activeTypes`")
"It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.availableTypes`")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefaultString("60s")
@ -2636,7 +2636,7 @@ object CelebornConf extends Logging {
.categories("master")
.version("0.3.0")
.doc("Policy for master to assign slots, Celeborn supports two types of policy: roundrobin and loadaware. " +
"Loadaware policy will be ignored when `HDFS` is enabled in `celeborn.storage.activeTypes`")
"Loadaware policy will be ignored when `HDFS` is enabled in `celeborn.storage.availableTypes`")
.stringConf
.transform(_.toUpperCase(Locale.ROOT))
.checkValues(Set(
@ -3176,7 +3176,7 @@ object CelebornConf extends Logging {
buildConf("celeborn.worker.replicate.fastFail.duration")
.categories("worker")
.doc("If a replicate request not replied during the duration, worker will mark the replicate data request as failed." +
"It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.activeTypes`.")
"It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.availableTypes`.")
.version("0.2.0")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefaultString("60s")
@ -3206,7 +3206,7 @@ object CelebornConf extends Logging {
.categories("worker")
.version("0.3.0")
.doc("Thread number of worker to commit shuffle data files asynchronously. " +
"It's recommended to set at least `128` when `HDFS` is enabled in `celeborn.storage.activeTypes`.")
"It's recommended to set at least `128` when `HDFS` is enabled in `celeborn.storage.availableTypes`.")
.intConf
.createWithDefault(32)
@ -3231,7 +3231,7 @@ object CelebornConf extends Logging {
.withAlternative("celeborn.worker.shuffle.commit.timeout")
.categories("worker")
.doc("Timeout for a Celeborn worker to commit files of a shuffle. " +
"It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.activeTypes`.")
"It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.availableTypes`.")
.version("0.3.0")
.timeConf(TimeUnit.MILLISECONDS)
.createWithDefaultString("120s")
@ -3250,7 +3250,7 @@ object CelebornConf extends Logging {
.withAlternative("celeborn.worker.partitionSorter.threads")
.categories("worker")
.doc("PartitionSorter's thread counts. " +
"It's recommended to set at least `64` when `HDFS` is enabled in `celeborn.storage.activeTypes`.")
"It's recommended to set at least `64` when `HDFS` is enabled in `celeborn.storage.availableTypes`.")
.version("0.3.0")
.intConf
.createOptional
@ -4019,7 +4019,7 @@ object CelebornConf extends Logging {
.categories("client")
.doc("When true, Celeborn worker will replicate shuffle data to another Celeborn worker " +
"asynchronously to ensure the pushed shuffle data won't be lost after the node failure. " +
"It's recommended to set `false` when `HDFS` is enabled in `celeborn.storage.activeTypes`.")
"It's recommended to set `false` when `HDFS` is enabled in `celeborn.storage.availableTypes`.")
.version("0.3.0")
.booleanConf
.createWithDefault(false)

View File

@ -195,25 +195,25 @@ class CelebornConfSuite extends CelebornFunSuite {
test("Test empty working dir") {
val conf = new CelebornConf()
conf.set("celeborn.storage.activeTypes", "HDFS")
conf.set("celeborn.storage.availableTypes", "HDFS")
conf.set("celeborn.storage.hdfs.dir", "hdfs:///xxx")
assert(conf.workerBaseDirs.isEmpty)
conf.set("celeborn.storage.activeTypes", "SSD,HDD,HDFS")
conf.set("celeborn.storage.availableTypes", "SSD,HDD,HDFS")
conf.set("celeborn.storage.hdfs.dir", "hdfs:///xxx")
assert(conf.workerBaseDirs.isEmpty)
conf.set("celeborn.storage.activeTypes", "SSD,HDD")
conf.set("celeborn.storage.availableTypes", "SSD,HDD")
assert(!conf.workerBaseDirs.isEmpty)
}
test("Test commit file threads") {
val conf = new CelebornConf()
conf.set("celeborn.storage.activeTypes", "HDFS")
conf.set("celeborn.storage.availableTypes", "HDFS")
conf.set("celeborn.storage.hdfs.dir", "hdfs:///xxx")
assert(conf.workerCommitThreads === 128)
conf.set("celeborn.storage.activeTypes", "SSD,HDD")
conf.set("celeborn.storage.availableTypes", "SSD,HDD")
assert(conf.workerCommitThreads === 32)
}

View File

@ -51,7 +51,7 @@ license: |
| celeborn.client.push.maxReqsInFlight.perWorker | 32 | false | Amount of Netty in-flight requests per worker. Default max memory of in flight requests per worker is `celeborn.client.push.maxReqsInFlight.perWorker` * `celeborn.client.push.buffer.max.size` * compression ratio(1 in worst case): 64KiB * 32 = 2MiB. The maximum memory will not exceed `celeborn.client.push.maxReqsInFlight.total`. | 0.3.0 | |
| celeborn.client.push.maxReqsInFlight.total | 256 | false | Amount of total Netty in-flight requests. The maximum memory is `celeborn.client.push.maxReqsInFlight.total` * `celeborn.client.push.buffer.max.size` * compression ratio(1 in worst case): 64KiB * 256 = 16MiB | 0.3.0 | celeborn.push.maxReqsInFlight |
| celeborn.client.push.queue.capacity | 512 | false | Push buffer queue size for a task. The maximum memory is `celeborn.client.push.buffer.max.size` * `celeborn.client.push.queue.capacity`, default: 64KiB * 512 = 32MiB | 0.3.0 | celeborn.push.queue.capacity |
| celeborn.client.push.replicate.enabled | false | false | When true, Celeborn worker will replicate shuffle data to another Celeborn worker asynchronously to ensure the pushed shuffle data won't be lost after the node failure. It's recommended to set `false` when `HDFS` is enabled in `celeborn.storage.activeTypes`. | 0.3.0 | celeborn.push.replicate.enabled |
| celeborn.client.push.replicate.enabled | false | false | When true, Celeborn worker will replicate shuffle data to another Celeborn worker asynchronously to ensure the pushed shuffle data won't be lost after the node failure. It's recommended to set `false` when `HDFS` is enabled in `celeborn.storage.availableTypes`. | 0.3.0 | celeborn.push.replicate.enabled |
| celeborn.client.push.retry.threads | 8 | false | Thread number to process shuffle re-send push data requests. | 0.3.0 | celeborn.push.retry.threads |
| celeborn.client.push.revive.batchSize | 2048 | false | Max number of partitions in one Revive request. | 0.3.0 | |
| celeborn.client.push.revive.interval | 100ms | false | Interval for client to trigger Revive to LifecycleManager. The number of partitions in one Revive request is `celeborn.client.push.revive.batchSize`. | 0.3.0 | |

View File

@ -69,7 +69,7 @@ license: |
| celeborn.master.slot.assign.loadAware.flushTimeWeight | 0.0 | false | Weight of average flush time when calculating ordering in load-aware assignment strategy | 0.3.0 | celeborn.slots.assign.loadAware.flushTimeWeight |
| celeborn.master.slot.assign.loadAware.numDiskGroups | 5 | false | This configuration is a guidance for load-aware slot allocation algorithm. This value is control how many disk groups will be created. | 0.3.0 | celeborn.slots.assign.loadAware.numDiskGroups |
| celeborn.master.slot.assign.maxWorkers | 10000 | false | Max workers that slots of one shuffle can be allocated on. Will choose the smaller positive one from Master side and Client side, see `celeborn.client.slot.assign.maxWorkers`. | 0.3.1 | |
| celeborn.master.slot.assign.policy | ROUNDROBIN | false | Policy for master to assign slots, Celeborn supports two types of policy: roundrobin and loadaware. Loadaware policy will be ignored when `HDFS` is enabled in `celeborn.storage.activeTypes` | 0.3.0 | celeborn.slots.assign.policy |
| celeborn.master.slot.assign.policy | ROUNDROBIN | false | Policy for master to assign slots, Celeborn supports two types of policy: roundrobin and loadaware. Loadaware policy will be ignored when `HDFS` is enabled in `celeborn.storage.availableTypes` | 0.3.0 | celeborn.slots.assign.policy |
| celeborn.master.userResourceConsumption.update.interval | 30s | false | Time length for a window about compute user resource consumption. | 0.3.0 | |
| celeborn.master.workerUnavailableInfo.expireTimeout | 1800s | false | Worker unavailable info would be cleared when the retention period is expired. Set -1 to disable the expiration. | 0.3.1 | |
| celeborn.quota.enabled | true | false | When Master side sets to true, the master will enable to check the quota via QuotaManager. When Client side sets to true, LifecycleManager will request Master side to check whether the current user has enough quota before registration of shuffle. Fallback to the default shuffle service of Spark when Master side checks that there is no enough quota for current user. | 0.2.0 | |

View File

@ -47,7 +47,7 @@ license: |
| celeborn.network.memory.allocator.verbose.metric | false | false | Whether to enable verbose metric for pooled allocator. | 0.3.0 | |
| celeborn.network.timeout | 240s | false | Default timeout for network operations. | 0.2.0 | |
| celeborn.port.maxRetries | 1 | false | When port is occupied, we will retry for max retry times. | 0.2.0 | |
| celeborn.rpc.askTimeout | 60s | false | Timeout for RPC ask operations. It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.activeTypes` | 0.2.0 | |
| celeborn.rpc.askTimeout | 60s | false | Timeout for RPC ask operations. It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.availableTypes` | 0.2.0 | |
| celeborn.rpc.connect.threads | 64 | false | | 0.2.0 | |
| celeborn.rpc.dispatcher.threads | 0 | false | Threads number of message dispatcher event loop. Default to 0, which is availableCore. | 0.3.0 | celeborn.rpc.dispatcher.numThreads |
| celeborn.rpc.inbox.capacity | 0 | false | Specifies size of the in memory bounded capacity. | 0.5.0 | |

View File

@ -54,8 +54,8 @@ license: |
| celeborn.worker.bufferStream.threadsPerMountpoint | 8 | false | Threads count for read buffer per mount point. | 0.3.0 | |
| celeborn.worker.clean.threads | 64 | false | Thread number of worker to clean up expired shuffle keys. | 0.3.2 | |
| celeborn.worker.closeIdleConnections | false | false | Whether worker will close idle connections. | 0.2.0 | |
| celeborn.worker.commitFiles.threads | 32 | false | Thread number of worker to commit shuffle data files asynchronously. It's recommended to set at least `128` when `HDFS` is enabled in `celeborn.storage.activeTypes`. | 0.3.0 | celeborn.worker.commit.threads |
| celeborn.worker.commitFiles.timeout | 120s | false | Timeout for a Celeborn worker to commit files of a shuffle. It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.activeTypes`. | 0.3.0 | celeborn.worker.shuffle.commit.timeout |
| celeborn.worker.commitFiles.threads | 32 | false | Thread number of worker to commit shuffle data files asynchronously. It's recommended to set at least `128` when `HDFS` is enabled in `celeborn.storage.availableTypes`. | 0.3.0 | celeborn.worker.commit.threads |
| celeborn.worker.commitFiles.timeout | 120s | false | Timeout for a Celeborn worker to commit files of a shuffle. It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.availableTypes`. | 0.3.0 | celeborn.worker.shuffle.commit.timeout |
| celeborn.worker.commitFiles.wait.threads | 32 | false | Thread number of worker to wait for commit shuffle data files to finish. | 0.5.0 | |
| celeborn.worker.congestionControl.check.interval | 10ms | false | Interval of worker checks congestion if celeborn.worker.congestionControl.enabled is true. | 0.3.2 | |
| celeborn.worker.congestionControl.enabled | false | false | Whether to enable congestion control or not. | 0.3.0 | |
@ -143,7 +143,7 @@ license: |
| celeborn.worker.readBuffer.target.updateInterval | 100ms | false | The interval for memory manager to calculate new read buffer's target memory. | 0.3.0 | |
| celeborn.worker.readBuffer.toTriggerReadMin | 32 | false | Min buffers count for map data partition to trigger read. | 0.3.0 | |
| celeborn.worker.register.timeout | 180s | false | Worker register timeout. | 0.2.0 | |
| celeborn.worker.replicate.fastFail.duration | 60s | false | If a replicate request not replied during the duration, worker will mark the replicate data request as failed.It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.activeTypes`. | 0.2.0 | |
| celeborn.worker.replicate.fastFail.duration | 60s | false | If a replicate request not replied during the duration, worker will mark the replicate data request as failed.It's recommended to set at least `240s` when `HDFS` is enabled in `celeborn.storage.availableTypes`. | 0.2.0 | |
| celeborn.worker.replicate.io.threads | &lt;undefined&gt; | false | Netty IO thread number of worker to replicate shuffle data. The default threads number is the number of flush thread. | 0.2.0 | |
| celeborn.worker.replicate.port | 0 | false | Server port for Worker to receive replicate data request from other Workers. | 0.2.0 | |
| celeborn.worker.replicate.randomConnection.enabled | true | false | Whether worker will create random connection to peer when replicate data. When false, worker tend to reuse the same cached TransportClient to a specific replicate worker; when true, worker tend to use different cached TransportClient. Netty will use the same thread to serve the same connection, so with more connections replicate server can leverage more netty threads | 0.2.1 | |
@ -156,7 +156,7 @@ license: |
| celeborn.worker.sortPartition.indexCache.maxWeight | 100000 | false | PartitionSorter's cache max weight for index buffer. | 0.4.0 | |
| celeborn.worker.sortPartition.prefetch.enabled | true | false | When true, partition sorter will prefetch the original partition files to page cache and reserve memory configured by `celeborn.worker.sortPartition.reservedMemoryPerPartition` to allocate a block of memory for prefetching while sorting a shuffle file off-heap with page cache for non-hdfs files. Otherwise, partition sorter seeks to position of each block and does not prefetch for non-hdfs files. | 0.5.0 | |
| celeborn.worker.sortPartition.reservedMemoryPerPartition | 1mb | false | Reserved memory when sorting a shuffle file off-heap. | 0.3.0 | celeborn.worker.partitionSorter.reservedMemoryPerPartition |
| celeborn.worker.sortPartition.threads | &lt;undefined&gt; | false | PartitionSorter's thread counts. It's recommended to set at least `64` when `HDFS` is enabled in `celeborn.storage.activeTypes`. | 0.3.0 | celeborn.worker.partitionSorter.threads |
| celeborn.worker.sortPartition.threads | &lt;undefined&gt; | false | PartitionSorter's thread counts. It's recommended to set at least `64` when `HDFS` is enabled in `celeborn.storage.availableTypes`. | 0.3.0 | celeborn.worker.partitionSorter.threads |
| celeborn.worker.sortPartition.timeout | 220s | false | Timeout for a shuffle file to sort. | 0.3.0 | celeborn.worker.partitionSorter.sort.timeout |
| celeborn.worker.storage.checkDirsEmpty.maxRetries | 3 | false | The number of retries for a worker to check if the working directory is cleaned up before registering with the master. | 0.3.0 | celeborn.worker.disk.checkFileClean.maxRetries |
| celeborn.worker.storage.checkDirsEmpty.timeout | 1000ms | false | The wait time per retry for a worker to check if the working directory is cleaned up before registering with the master. | 0.3.0 | celeborn.worker.disk.checkFileClean.timeout |

View File

@ -49,7 +49,7 @@ celeborn.worker.storage.dirs /mnt/disk1:disktype=SSD,/mnt/disk2:disktype=SSD
# If Celeborn workers don't have local disks. You can use HDFS.
# Do not set `celeborn.worker.storage.dirs` and use following configs.
celeborn.storage.activeTypes HDFS
celeborn.storage.availableTypes HDFS
celeborn.worker.sortPartition.threads 64
celeborn.worker.commitFiles.timeout 240s
celeborn.worker.commitFiles.threads 128
@ -98,7 +98,7 @@ celeborn.worker.storage.dirs /mnt/disk1:disktype=SSD,/mnt/disk2:disktype=SSD
# If Celeborn workers don't have local disks. You can use HDFS.
# Do not set `celeborn.worker.storage.dirs` and use following configs.
celeborn.storage.activeTypes HDFS
celeborn.storage.availableTypes HDFS
celeborn.worker.sortPartition.threads 64
celeborn.worker.commitFiles.timeout 240s
celeborn.worker.commitFiles.threads 128