130 lines
4.1 KiB
YAML
130 lines
4.1 KiB
YAML
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
#
|
|
|
|
# Default values for celeborn.
|
|
# This is a YAML-formatted file.
|
|
# Declare variables to be passed into your templates.
|
|
|
|
# we provide a default celeborn image, you can also replace to your image
|
|
# TODO rebuild celeborn official image
|
|
image:
|
|
repository: aliyunemr/remote-shuffle-service
|
|
pullPolicy: Always
|
|
tag: 0.1.1-6badd20
|
|
|
|
imagePullSecrets: {}
|
|
|
|
# master replicas should not less than 3
|
|
masterReplicas: 3
|
|
# worker replicas set on demand, should less than node number
|
|
workerReplicas: 5
|
|
|
|
# celeborn release version
|
|
celebornVersion: 0.1.1
|
|
|
|
# celeborn configurations
|
|
celeborn:
|
|
# please update celeborn.worker.storage.dirs to disk mount path on k8s node
|
|
celeborn.worker.storage.dirs: /mnt/disk1,/mnt/disk2,/mnt/disk3,/mnt/disk4
|
|
celeborn.push.replicate.enabled: true
|
|
celeborn.master.metrics.prometheus.port: 9098
|
|
celeborn.worker.monitor.disk.enabled: false
|
|
rss.rpc.io.serverThreads: 64
|
|
rss.worker.fetch.chunk.size: 8m
|
|
rss.rpc.io.numConnectionsPerPeer: 2
|
|
celeborn.worker.flush.buffer.size: 256K
|
|
celeborn.metrics.enabled: true
|
|
rss.push.io.threads: 32
|
|
celeborn.worker.fetch.io.threads: 32
|
|
celeborn.push.stageEnd.timeout: 120s
|
|
celeborn.worker.metrics.prometheus.port: 9096
|
|
rss.rpc.io.clientThreads: 64
|
|
celeborn.application.heartbeat.timeout: 120s
|
|
rss.rpc.dispatcher.numThreads: 4
|
|
celeborn.worker.heartbeat.timeout: 120s
|
|
celeborn.ha.master.ratis.raft.server.storage.dir: /mnt/rss_ratis/
|
|
|
|
environments:
|
|
CELEBORN_MASTER_MEMORY: 2g
|
|
CELEBORN_MASTER_JAVA_OPTS: "-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:gc-master.out -Dio.netty.leakDetectionLevel=advanced"
|
|
CELEBORN_WORKER_MEMORY: 2g
|
|
CELEBORN_WORKER_OFFHEAP_MEMORY: 12g
|
|
CELEBORN_WORKER_JAVA_OPTS: "-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:gc-worker.out -Dio.netty.leakDetectionLevel=advanced"
|
|
CELEBORN_NO_DAEMONIZE: "yes"
|
|
TZ: "Asia/Shanghai"
|
|
|
|
podMonitor:
|
|
enable: true
|
|
podMetricsEndpoint:
|
|
scheme: http
|
|
interval: 5s
|
|
portName: metrics
|
|
|
|
service:
|
|
type: ClusterIP
|
|
port: 9097
|
|
|
|
configmap: celeborn-conf
|
|
|
|
resources: {}
|
|
# We usually recommend not to specify default resources and to leave this as a conscious
|
|
# choice for the user. This also increases chances charts run on environments with little
|
|
# resources, such as Minikube. If you do want to specify resources, uncomment the following
|
|
# lines, adjust them as necessary, and remove the curly braces after 'resources:'.
|
|
#master:
|
|
# limits:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
#worker:
|
|
# limits:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
# requests:
|
|
# cpu: 100m
|
|
# memory: 128Mi
|
|
|
|
podAnnotations: {}
|
|
|
|
affinity:
|
|
master:
|
|
podAntiAffinity:
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
- labelSelector:
|
|
matchExpressions:
|
|
- key: app.kubernetes.io/name
|
|
operator: In
|
|
values:
|
|
- celeborn-master
|
|
topologyKey: kubernetes.io/hostname
|
|
worker:
|
|
podAntiAffinity:
|
|
requiredDuringSchedulingIgnoredDuringExecution:
|
|
- labelSelector:
|
|
matchExpressions:
|
|
- key: app.kubernetes.io/name
|
|
operator: In
|
|
values:
|
|
- celeborn-worker
|
|
topologyKey: "kubernetes.io/hostname"
|
|
|
|
tolerations: []
|
|
|
|
nodeSelector: {}
|