[ISSUE-911] Decrease numConnectionsPerPeer to achieve better performance (#983)

This commit is contained in:
Gabriel 2022-11-20 11:46:17 +08:00 committed by GitHub
parent fb6d1de108
commit 5ecb09d62a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 5 additions and 5 deletions

View File

@ -1046,7 +1046,7 @@ object CelebornConf extends Logging {
.categories("network")
.doc("Number of concurrent connections between two nodes.")
.intConf
.createWithDefault(8)
.createWithDefault(2)
val NETWORK_IO_BACKLOG: ConfigEntry[Int] =
buildConf("celeborn.<module>.io.backLog")

View File

@ -19,9 +19,9 @@
celeborn.rpc.io.mode NIO
celeborn.rpc.io.clientThreads 8
celeborn.rpc.io.serverThreads 8
celeborn.rpc.io.numConnectionsPerPeer 8
celeborn.rpc.io.numConnectionsPerPeer 2
celeborn.push.io.numConnectionsPerPeer 8
celeborn.push.io.numConnectionsPerPeer 2
celeborn.push.io.threads 8
celeborn.push.replicate.enabled true

View File

@ -45,7 +45,7 @@ celeborn:
celeborn.worker.monitor.disk.enabled: false
rss.rpc.io.serverThreads: 64
rss.worker.fetch.chunk.size: 8m
rss.rpc.io.numConnectionsPerPeer: 8
rss.rpc.io.numConnectionsPerPeer: 2
celeborn.worker.flush.buffer.size: 256K
celeborn.metrics.enabled: true
rss.push.io.threads: 32

View File

@ -28,7 +28,7 @@ license: |
| celeborn.&lt;module&gt;.io.lazyFD | true | Whether to initialize FileDescriptor lazily or not. If true, file descriptors are created only when data is going to be transferred. This can reduce the number of open files. | |
| celeborn.&lt;module&gt;.io.maxRetries | 3 | Max number of times we will try IO exceptions (such as connection timeouts) per request. If set to 0, we will not do any retries. | |
| celeborn.&lt;module&gt;.io.mode | NIO | Netty EventLoopGroup backend, available options: NIO, EPOLL. | |
| celeborn.&lt;module&gt;.io.numConnectionsPerPeer | 8 | Number of concurrent connections between two nodes. | |
| celeborn.&lt;module&gt;.io.numConnectionsPerPeer | 2 | Number of concurrent connections between two nodes. | |
| celeborn.&lt;module&gt;.io.preferDirectBufs | true | If true, we will prefer allocating off-heap byte buffers within Netty. | |
| celeborn.&lt;module&gt;.io.receiveBuffer | 0b | Receive buffer size (SO_RCVBUF). Note: the optimal size for receive buffer and send buffer should be latency * network_bandwidth. Assuming latency = 1ms, network_bandwidth = 10Gbps buffer size should be ~ 1.25MB. | 0.2.0 |
| celeborn.&lt;module&gt;.io.retryWait | 5s | Time that we will wait in order to perform a retry after an IOException. Only relevant if maxIORetries > 0. | 0.2.0 |