celeborn/docker/helm/values.yaml

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Default values for celeborn.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.

# we provide a default celeborn image, you can also replace to your image
# TODO rebuild celeborn official image
image:
  repository: aliyunemr/remote-shuffle-service
  pullPolicy: Always
  tag: 0.1.1-6badd20

imagePullSecrets: {}

# master replicas should not less than 3
masterReplicas: 3
# worker replicas set on demand, should less than node number
workerReplicas: 5

# celeborn release version
celebornVersion: 0.1.1

# celeborn configurations
celeborn:
  # please update celeborn.worker.storage.dirs to disk mount path on k8s node
  celeborn.worker.storage.dirs: /mnt/disk1,/mnt/disk2,/mnt/disk3,/mnt/disk4
  celeborn.push.replicate.enabled: true
  celeborn.master.metrics.prometheus.port: 9098
  celeborn.worker.monitor.disk.enabled: false
  rss.rpc.io.serverThreads: 64
  rss.worker.fetch.chunk.size: 8m
  rss.rpc.io.numConnectionsPerPeer: 2
  celeborn.worker.flush.buffer.size: 256K
  celeborn.metrics.enabled: true
  rss.push.io.threads: 32
  celeborn.worker.fetch.io.threads: 32
  celeborn.push.stageEnd.timeout: 120s
  celeborn.worker.metrics.prometheus.port: 9096
  rss.rpc.io.clientThreads: 64
  celeborn.application.heartbeat.timeout: 120s
  rss.rpc.dispatcher.numThreads: 4
  celeborn.worker.heartbeat.timeout: 120s
  celeborn.ha.master.ratis.raft.server.storage.dir: /mnt/rss_ratis/

environments:
  CELEBORN_MASTER_MEMORY: 2g
  CELEBORN_MASTER_JAVA_OPTS: "-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:gc-master.out -Dio.netty.leakDetectionLevel=advanced"
  CELEBORN_WORKER_MEMORY: 2g
  CELEBORN_WORKER_OFFHEAP_MEMORY: 12g
  CELEBORN_WORKER_JAVA_OPTS: "-XX:-PrintGC -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps -Xloggc:gc-worker.out -Dio.netty.leakDetectionLevel=advanced"
  CELEBORN_NO_DAEMONIZE: "yes"
  TZ: "Asia/Shanghai"

podMonitor:
  enable: true
  podMetricsEndpoint:
    scheme: http
    interval: 5s
    portName: metrics

service:
  type: ClusterIP
  port: 9097

configmap: celeborn-conf

resources: {}
  # We usually recommend not to specify default resources and to leave this as a conscious
  # choice for the user. This also increases chances charts run on environments with little
  # resources, such as Minikube. If you do want to specify resources, uncomment the following
  # lines, adjust them as necessary, and remove the curly braces after 'resources:'.
  #master:
  #  limits:
  #    cpu: 100m
  #    memory: 128Mi
  #  requests:
  #    cpu: 100m
  #    memory: 128Mi
  #worker:
  #  limits:
  #    cpu: 100m
  #    memory: 128Mi
  #  requests:
  #    cpu: 100m
  #    memory: 128Mi

podAnnotations: {}

affinity:
  master:
    podAntiAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        - labelSelector:
            matchExpressions:
              - key: app.kubernetes.io/name
                operator: In
                values:
                  - celeborn-master
          topologyKey: kubernetes.io/hostname
  worker:
    podAntiAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        - labelSelector:
            matchExpressions:
              - key: app.kubernetes.io/name
                operator: In
                values:
                  - celeborn-worker
          topologyKey: "kubernetes.io/hostname"

tolerations: []

nodeSelector: {}