[CELEBORN-1996][HELM] Rename volumes.{master,worker} to {master,worker}.volumes and {master.worker}.volumeMounts

### What changes were proposed in this pull request?

- Rename `volumes.master` to `master.volumes` and `master.volumeMounts`.
- Rename `volumes.worker` to `worker.volumes` and `worker.volumeMounts`.
- Users will need to configure `celeborn.master.ha.ratis.raft.server.storage.dir` manually.
- Users will need to configure `celeborn.worker.storage.dirs` manually.

### Why are the changes needed?

- Unify the values naming by prefixing them with `master` or `worker`.
- Provide users the maximum flexibility to configure storage.
- It will be easier to implement persistentVolumeClaims feature.

### Does this PR introduce _any_ user-facing change?

Yes.

### How was this patch tested?

Run Helm unit tests by `helm unittest charts/celeborn --file "tests/**/*_test.yaml" --strict --debug`.

Closes #3254 from ChenYi015/helm/volumes.

Authored-by: Yi Chen <github@chenyicn.net>
Signed-off-by: Wang, Fei <fwang12@ebay.com>
This commit is contained in:
Yi Chen 2025-05-21 01:21:45 -07:00 committed by Wang, Fei
parent 46d9d63e1f
commit 45b94bf052
7 changed files with 289 additions and 156 deletions

View File

@ -38,44 +38,9 @@ cluster:
# -- Specifies Kubernetes cluster name
name: cluster
# Specifies Celeborn volumes.
# Currently supported volume types are `emptyDir` and `hostPath`.
# Note that `hostPath` only works in hostPath type using to set `volumes hostPath path`.
# Celeborn Master will pick first volumes for store raft log.
# `diskType` only works in Celeborn Worker with hostPath type to manifest local disk type.
volumes:
# -- Specifies volumes for Celeborn master pods
master:
- mountPath: /mnt/celeborn_ratis
type: emptyDir
size: 1Gi
# -- Specifies volumes for Celeborn worker pods
worker:
- mountPath: /mnt/disk1
type: emptyDir
size: 1Gi
- mountPath: /mnt/disk2
type: emptyDir
size: 1Gi
# celeborn configurations
celeborn:
celeborn.master.ha.enabled: false
celeborn.metrics.enabled: false
celeborn.master.http.port: 9098
celeborn.worker.http.port: 9096
celeborn.worker.monitor.disk.enabled: false
celeborn.shuffle.chunk.size: 8m
celeborn.rpc.io.serverThreads: 64
celeborn.rpc.io.numConnectionsPerPeer: 2
celeborn.rpc.io.clientThreads: 64
celeborn.rpc.dispatcher.numThreads: 4
celeborn.worker.flusher.buffer.size: 256K
celeborn.worker.fetch.io.threads: 32
celeborn.worker.push.io.threads: 32
celeborn.push.stageEnd.timeout: 120s
celeborn.application.heartbeat.timeout: 120s
celeborn.worker.heartbeat.timeout: 120s
celeborn.worker.storage.dirs: /mnt/disk1:disktype=SSD:capacity=1Gi,/mnt/disk2:disktype=SSD:capacity=1Gi
master:
# -- Number of Celeborn master replicas to deploy, should not less than 3.
@ -92,6 +57,11 @@ master:
- name: TZ
value: Asia/Shanghai
# -- Volume mounts for Celeborn master containers.
volumeMounts:
- name: celeborn-ratis
mountPath: /mnt/celeborn_ratis
# -- Resources for Celeborn master containers.
resources:
requests:
@ -101,6 +71,12 @@ master:
cpu: 100m
memory: 800Mi
# -- Volumes for Celeborn master pods.
volumes:
- name: celeborn-ratis
emptyDir:
sizeLimit: 1Gi
# -- DNS policy for Celeborn master pods.
dnsPolicy: ClusterFirstWithHostNet
@ -124,6 +100,13 @@ worker:
- name: TZ
value: Asia/Shanghai
# -- Volume mounts for Celeborn worker containers.
volumeMounts:
- name: disk1
mountPath: /mnt/disk1
- name: disk2
mountPath: /mnt/disk2
# -- Resources for Celeborn worker containers.
resources:
requests:
@ -133,6 +116,15 @@ worker:
cpu: 100m
memory: 1Gi
# --Volumes for Celeborn worker pods.
volumes:
- name: disk1
emptyDir:
sizeLimit: 1Gi
- name: disk2
emptyDir:
sizeLimit: 1Gi
# -- DNS policy for Celeborn worker pods.
dnsPolicy: ClusterFirstWithHostNet

View File

@ -23,23 +23,19 @@ metadata:
{{- include "celeborn.labels" . | nindent 4 }}
data:
celeborn-defaults.conf: |-
{{- $namespace := .Release.Namespace }}
celeborn.master.endpoints={{ range until (.Values.master.replicas |int) }}{{ $.Release.Name }}-master-{{ . }}.{{ $.Release.Name }}-master-svc.{{ $namespace }}.svc.{{ $.Values.cluster.name }}.local,{{ end }}
{{- range until (.Values.master.replicas |int) }}
celeborn.master.ha.node.{{ . }}.host={{ $.Release.Name }}-master-{{ . }}.{{ $.Release.Name }}-master-svc.{{ $namespace }}.svc.{{ $.Values.cluster.name }}.local
{{- $endpoints := list }}
{{- range until (.Values.master.replicas | int) }}
{{- $endpoint := (printf "%s-%d.%s.%s.svc.%s.local" (include "celeborn.master.statefulSet.name" $) . (include "celeborn.master.service.name" $) $.Release.Namespace $.Values.cluster.name) }}
{{- $endpoints = append $endpoints $endpoint }}
{{- end }}
{{- $dirs := .Values.volumes.master }}
celeborn.master.ha.ratis.raft.server.storage.dir={{ (index $dirs 0).mountPath }}
{{- $path := "" }}
{{- range $worker := .Values.volumes.worker }}
{{- $info := (cat $worker.mountPath ":disktype=" (get $worker "diskType" | default "HDD") ":capacity=" (get $worker "capacity" | default "1PB") | nospace) }}
{{- if eq $path "" }}
{{- $path = $info }}
{{- else }}
{{- $path = ( list $path $info | join ",") }}
celeborn.master.endpoints={{ $endpoints | join "," }}
{{- range until (.Values.master.replicas | int) }}
{{- $host := (printf "%s-%d.%s.%s.svc.%s.local" (include "celeborn.master.statefulSet.name" $) . (include "celeborn.master.service.name" $) $.Release.Namespace $.Values.cluster.name) }}
celeborn.master.ha.node.{{ . }}.host={{ $host }}
{{- end }}
{{- end }}
celeborn.worker.storage.dirs={{ $path }}
{{- range $key, $val := .Values.celeborn }}
{{ $key }}={{ $val }}
{{- end }}

View File

@ -39,27 +39,34 @@ spec:
spec:
serviceAccountName: {{ include "celeborn.serviceAccountName" . }}
initContainers:
{{- $dirs := .Values.volumes.master }}
{{- if eq "hostPath" (index $dirs 0).type }}
- name: chown-{{ $.Release.Name }}-master-volume
image: {{ .Values.image.initContainerImage }}
- name: chown-celeborn-master-volume
image: {{ include "celeborn.image" . }}
{{- with .Values.image.pullPolicy }}
imagePullPolicy: {{ . }}
{{- end }}
command:
- chown
- -R
- {{ .Values.master.podSecurityContext.runAsUser | default 10006 }}:{{ .Values.master.podSecurityContext.runAsGroup | default 10006 }}
- {{ (index $dirs 0).mountPath }}
{{- range $volumeMount := .Values.master.volumeMounts }}
{{- range $volume := $.Values.master.volumes }}
{{- if eq $volume.name $volumeMount.name }}
{{- if or $volume.hostPath $volume.emptyDir }}
- {{ $volumeMount.mountPath }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
{{- with .Values.master.volumeMounts }}
volumeMounts:
- name: {{ $.Release.Name }}-master-vol-0
mountPath: {{ (index $dirs 0).mountPath }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.master.resources }}
resources:
{{- toYaml . | nindent 10 }}
{{- end }}
securityContext:
runAsUser: 0
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: {{ include "celeborn.image" . }}
@ -94,12 +101,17 @@ spec:
{{- toYaml . | nindent 8 }}
{{- end }}
volumeMounts:
- name: {{ include "celeborn.fullname" . }}-volume
mountPath: /opt/celeborn/conf
readOnly: true
{{- range $index, $volume := .Values.volumes.master }}
- name: {{ $.Release.Name }}-master-vol-{{ $index }}
mountPath: {{ .mountPath }}
- name: celeborn-conf
subPath: celeborn-defaults.conf
mountPath: /opt/celeborn/conf/celeborn-defaults.conf
- name: celeborn-conf
subPath: log4j2.xml
mountPath: /opt/celeborn/conf/log4j2.xml
- name: celeborn-conf
subPath: metrics.properties
mountPath: /opt/celeborn/conf/metrics.properties
{{- with .Values.master.volumeMounts }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.master.resources }}
resources:
@ -114,21 +126,19 @@ spec:
{{- toYaml . | nindent 8 }}
{{- end }}
volumes:
- name: {{ include "celeborn.fullname" . }}-volume
- name: celeborn-conf
configMap:
name: {{ include "celeborn.configMapName" . }}
{{- range $index, $volume := .Values.volumes.master }}
- name: {{ $.Release.Name }}-master-vol-{{ $index }}
{{- if eq "emptyDir" $volume.type }}
emptyDir:
sizeLimit: {{ $volume.capacity }}
{{- else if eq "hostPath" $volume.type }}
hostPath:
path: {{ $volume.hostPath | default $volume.mountPath }}/master
type: DirectoryOrCreate
{{- else }}
{{ fail "For now Celeborn Helm only support emptyDir or hostPath volume types" }}
{{- end }}
defaultMode: 0444
items:
- key: celeborn-defaults.conf
path: celeborn-defaults.conf
- key: log4j2.xml
path: log4j2.xml
- key: metrics.properties
path: metrics.properties
{{- with .Values.master.volumes }}
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .Values.master.nodeSelector }}
nodeSelector:

View File

@ -39,31 +39,34 @@ spec:
spec:
serviceAccountName: {{ include "celeborn.serviceAccountName" . }}
initContainers:
{{- $dirs := .Values.volumes.worker }}
{{- if eq "hostPath" (index $dirs 0).type }}
- name: chown-{{ $.Release.Name }}-worker-volume
image: {{ .Values.image.initContainerImage }}
- name: chown-celeborn-worker-volume
image: {{ include "celeborn.image" . }}
{{- with .Values.image.pullPolicy }}
imagePullPolicy: {{ . }}
{{- end }}
command:
- chown
- -R
- {{ .Values.worker.podSecurityContext.runAsUser | default 10006 }}:{{ .Values.worker.podSecurityContext.runAsGroup | default 10006 }}
{{- range $dir := $dirs }}
- {{ $dir.mountPath }}
{{- end}}
{{- range $volumeMount := .Values.worker.volumeMounts }}
{{- range $volume := $.Values.worker.volumes }}
{{- if eq $volume.name $volumeMount.name }}
{{- if or $volume.hostPath $volume.emptyDir }}
- {{ $volumeMount.mountPath }}
{{- end }}
{{- end }}
{{- end }}
{{- end }}
{{- with .Values.worker.volumeMounts }}
volumeMounts:
{{- range $index, $dir := $dirs }}
- name: {{ $.Release.Name }}-worker-vol-{{ $index }}
mountPath: {{ $dir.mountPath }}
{{- end}}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.worker.resources }}
resources:
{{- toYaml . | nindent 10 }}
{{- end }}
securityContext:
runAsUser: 0
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: {{ include "celeborn.image" . }}
@ -97,12 +100,17 @@ spec:
{{- toYaml . | nindent 8 }}
{{- end }}
volumeMounts:
- mountPath: /opt/celeborn/conf
name: {{ include "celeborn.fullname" . }}-volume
readOnly: true
{{- range $index, $volume := .Values.volumes.worker }}
- name: {{ $.Release.Name }}-worker-vol-{{ $index }}
mountPath: {{ .mountPath }}
- name: celeborn-conf
subPath: celeborn-defaults.conf
mountPath: /opt/celeborn/conf/celeborn-defaults.conf
- name: celeborn-conf
subPath: log4j2.xml
mountPath: /opt/celeborn/conf/log4j2.xml
- name: celeborn-conf
subPath: metrics.properties
mountPath: /opt/celeborn/conf/metrics.properties
{{- with .Values.worker.volumeMounts }}
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.worker.resources }}
resources:
@ -117,21 +125,19 @@ spec:
{{- toYaml . | nindent 8 }}
{{- end }}
volumes:
- name: {{ include "celeborn.fullname" . }}-volume
- name: celeborn-conf
configMap:
name: {{ include "celeborn.configMapName" . }}
{{- range $index, $volume := .Values.volumes.worker }}
- name: {{ $.Release.Name }}-worker-vol-{{ $index }}
{{- if eq "emptyDir" $volume.type }}
emptyDir:
sizeLimit: {{ $volume.capacity }}
{{- else if eq "hostPath" $volume.type }}
hostPath:
path: {{ $volume.hostPath | default $volume.mountPath }}/worker
type: DirectoryOrCreate
{{- else }}
{{ fail "Currently, Celeborn chart only supports 'emptyDir' and 'hostPath' volume types" }}
{{- end }}
defaultMode: 0444
items:
- key: celeborn-defaults.conf
path: celeborn-defaults.conf
- key: log4j2.xml
path: log4j2.xml
- key: metrics.properties
path: metrics.properties
{{- with .Values.worker.volumes }}
{{- toYaml . | nindent 6 }}
{{- end }}
{{- with .Values.worker.nodeSelector }}
nodeSelector:
@ -159,4 +165,3 @@ spec:
{{- toYaml . | nindent 8 }}
{{- end }}
terminationGracePeriodSeconds: 30

View File

@ -134,6 +134,26 @@ tests:
name: test-secret
optional: false
- it: Should add volume mounts if `master.volumeMounts` is set
set:
master:
volumeMounts:
- name: disk1
mountPath: /mnt/disk1
- name: disk2
mountPath: /mnt/disk2
asserts:
- contains:
path: spec.template.spec.containers[0].volumeMounts
content:
name: disk1
mountPath: /mnt/disk1
- contains:
path: spec.template.spec.containers[0].volumeMounts
content:
name: disk1
mountPath: /mnt/disk1
- it: Should use the specified resources if `master.resources` is set
set:
master:
@ -206,6 +226,32 @@ tests:
path: spec.template.spec.imagePullSecrets[1].name
value: test-secret2
- it: Should add volumes if `master.volumes` is set
set:
master:
volumes:
- name: disk1
emptyDir:
sizeLimit: 10Gi
- name: disk2
hostPath:
type: DirectoryOrCreate
path: /mnt/disk2
asserts:
- contains:
path: spec.template.spec.volumes
content:
name: disk1
emptyDir:
sizeLimit: 10Gi
- contains:
path: spec.template.spec.volumes
content:
name: disk2
hostPath:
type: DirectoryOrCreate
path: /mnt/disk2
- it: Should add node selector if `master.nodeSelector` is set
set:
master:

View File

@ -133,6 +133,26 @@ tests:
name: test-secret
optional: false
- it: Should add volume mounts if `worker.volumeMounts` is set
set:
worker:
volumeMounts:
- name: disk1
mountPath: /mnt/disk1
- name: disk2
mountPath: /mnt/disk2
asserts:
- contains:
path: spec.template.spec.containers[0].volumeMounts
content:
name: disk1
mountPath: /mnt/disk1
- contains:
path: spec.template.spec.containers[0].volumeMounts
content:
name: disk1
mountPath: /mnt/disk1
- it: Should use the specified resources if `worker.resources` is set
set:
worker:
@ -205,6 +225,32 @@ tests:
path: spec.template.spec.imagePullSecrets[1].name
value: test-secret2
- it: Should add volumes if `worker.volumes` is set
set:
worker:
volumes:
- name: disk1
emptyDir:
sizeLimit: 10Gi
- name: disk2
hostPath:
type: DirectoryOrCreate
path: /mnt/disk2
asserts:
- contains:
path: spec.template.spec.volumes
content:
name: disk1
emptyDir:
sizeLimit: 10Gi
- contains:
path: spec.template.spec.volumes
content:
name: disk2
hostPath:
type: DirectoryOrCreate
path: /mnt/disk2
- it: Should add node selector if `worker.nodeSelector` is set
set:
worker:

View File

@ -63,60 +63,56 @@ cluster:
# -- Specifies Kubernetes cluster name
name: cluster
# Specifies Celeborn volumes.
# Currently supported volume types are `emptyDir` and `hostPath`.
# Note that `hostPath` only works in hostPath type using to set `volumes hostPath path`.
# Celeborn Master will pick first volumes for store raft log.
# `diskType` only works in Celeborn Worker with hostPath type to manifest local disk type.
volumes:
# -- Specifies volumes for Celeborn master pods
master:
- mountPath: /mnt/celeborn_ratis
hostPath: /mnt/celeborn_ratis
type: hostPath
capacity: 100Gi
# -- Specifies volumes for Celeborn worker pods
worker:
- mountPath: /mnt/disk1
hostPath: /mnt/disk1
type: hostPath
diskType: SSD
capacity: 100Gi
- mountPath: /mnt/disk2
hostPath: /mnt/disk2
type: hostPath
diskType: SSD
capacity: 100Gi
- mountPath: /mnt/disk3
hostPath: /mnt/disk3
type: hostPath
diskType: SSD
capacity: 100Gi
- mountPath: /mnt/disk4
hostPath: /mnt/disk4
type: hostPath
diskType: SSD
capacity: 100Gi
# -- Celeborn configurations
# -- Celeborn configurations.
# Ref: [Configuration - Apache Celeborn](https://celeborn.apache.org/docs/latest/configuration).
celeborn:
celeborn.master.ha.enabled: true
celeborn.metrics.enabled: true
celeborn.metrics.prometheus.path: /metrics/prometheus
# ============================================================================
# Mater
# ============================================================================
celeborn.master.http.port: 9098
celeborn.master.heartbeat.worker.timeout: 120s
celeborn.master.heartbeat.application.timeout: 300s
# ============================================================================
# Mater HA
# ============================================================================
celeborn.master.ha.enabled: true
# Do not edit `celeborn.master.ha.node.<id>.host` manually, it should be configured automatically by Helm.
# celeborn.master.ha.node.<id>.host: ""
celeborn.master.ha.ratis.raft.server.storage.dir: /mnt/celeborn_ratis
# ============================================================================
# Worker
# ============================================================================
# Do not edit `celeborn.master.endpoints` manually, it should be configured automatically by Helm.
# celeborn.master.endpoints: <localhost>:9097
celeborn.shuffle.chunk.size: 8m
celeborn.worker.fetch.io.threads: 32
celeborn.worker.flusher.buffer.size: 256K
celeborn.worker.heartbeat.timeout: 120s
celeborn.worker.http.port: 9096
celeborn.worker.monitor.disk.enabled: false
celeborn.shuffle.chunk.size: 8m
celeborn.worker.push.io.threads: 32
celeborn.worker.storage.dirs: /mnt/disk1:disktype=SSD:capacity=100Gi,/mnt/disk2:disktype=SSD:capacity=100Gi,/mnt/disk3:disktype=SSD:capacity=100Gi,/mnt/disk4:disktype=SSD:capacity=100Gi
# ============================================================================
# Client
# ============================================================================
celeborn.client.push.stageEnd.timeout: 120s
# ============================================================================
# Network
# ============================================================================
celeborn.rpc.io.serverThreads: 64
celeborn.rpc.io.numConnectionsPerPeer: 2
celeborn.rpc.io.clientThreads: 64
celeborn.rpc.dispatcher.numThreads: 4
celeborn.worker.flusher.buffer.size: 256K
celeborn.worker.fetch.io.threads: 32
celeborn.worker.push.io.threads: 32
celeborn.push.stageEnd.timeout: 120s
celeborn.application.heartbeat.timeout: 120s
celeborn.worker.heartbeat.timeout: 120s
# ============================================================================
# Metrics
# ============================================================================
celeborn.metrics.enabled: true
celeborn.metrics.prometheus.path: /metrics/prometheus
master:
# -- Number of Celeborn master replicas to deploy, should not less than 3.
@ -147,6 +143,11 @@ master:
# name: celeborn-secret
# optional: false
# -- Volume mounts for Celeborn master containers.
volumeMounts:
- name: celeborn-ratis
mountPath: /mnt/celeborn_ratis
# -- Resources for Celeborn master containers.
resources:
# requests:
@ -164,6 +165,13 @@ master:
# runAsGroup: 10006
# fsGroup: 10006
# -- Volumes for Celeborn master pods.
volumes:
- name: celeborn-ratis
hostPath:
type: DirectoryOrCreate
path: /mnt/celeborn_ratis
# -- Node selector for Celeborn master pods.
nodeSelector:
# key1: value1
@ -250,6 +258,17 @@ worker:
# name: celeborn-secret
# optional: false
# -- Volume mounts for Celeborn worker containers.
volumeMounts:
- name: disk1
mountPath: /mnt/disk1
- name: disk2
mountPath: /mnt/disk2
- name: disk3
mountPath: /mnt/disk3
- name: disk4
mountPath: /mnt/disk4
# -- Resources for Celeborn worker containers.
resources:
# requests:
@ -267,6 +286,25 @@ worker:
# runAsGroup: 10006
# fsGroup: 10006
# --Volumes for Celeborn worker pods.
volumes:
- name: disk1
hostPath:
type: DirectoryOrCreate
path: /mnt/disk1
- name: disk2
hostPath:
type: DirectoryOrCreate
path: /mnt/disk2
- name: disk3
hostPath:
type: DirectoryOrCreate
path: /mnt/disk3
- name: disk4
hostPath:
type: DirectoryOrCreate
path: /mnt/disk4
# -- Node selector for Celeborn worker pods.
nodeSelector:
# key1: value1