[CELEBORN-1552] automatically support prometheus to scrape metrics for helm chart

### What changes were proposed in this pull request?
1. Add Annotations to Master Service and Worker Service for automatically scraping by Prometheus.
2. Add Ports to Worker Service, since it's empty before that prometheus cannot connect to workers.

### Why are the changes needed?
Although master and worker provide http interfaces, We still need add annotations manually for prometheus automatically scraping.

### Does this PR introduce _any_ user-facing change?
No. Users will not feel any changes to install and use Celeborn.

### How was this patch tested?
test locally and in dev environment.

Before:
![image](https://github.com/user-attachments/assets/d924929f-1cd9-4487-afc6-08390fc8dfc2)

After:
![image](https://github.com/user-attachments/assets/145b0727-e66a-4268-af4d-cf0619eb3b14)

Closes #2673 from lianneli/PR-1552.

Authored-by: Lianne Li <lmlianne@outlook.com>
Signed-off-by: Shuang <lvshuang.xjs@alibaba-inc.com>
This commit is contained in:
Lianne Li 2024-08-26 14:12:45 +08:00 committed by Shuang
parent d14afcddfe
commit 1ea704f330
3 changed files with 80 additions and 0 deletions

View File

@ -155,3 +155,78 @@ Create the name of the worker podmonitor to use
{{- define "celeborn.workerPodMonitorName" -}}
{{ include "celeborn.fullname" . }}-worker-podmonitor
{{- end }}
{{/*
Create master annotations if metrics enables
*/}}
{{- define "celeborn.masterMetricsAnnotation" -}}
{{- $metricsEnabled := true -}}
{{- $metricsPath := "/metrics/prometheus" -}}
{{- $masterPort := 9098 -}}
{{- range $key, $val := .Values.celeborn }}
{{- if eq $key "celeborn.metrics.enabled" }}
{{- $metricsEnabled = $val -}}
{{- end }}
{{- if eq $key "celeborn.metrics.prometheus.path" }}
{{- $metricsPath = $val -}}
{{- end }}
{{- if eq $key "celeborn.master.http.port" }}
{{- $masterPort = $val -}}
{{- end }}
{{- end }}
{{- if eq (toString $metricsEnabled) "true" -}}
prometheus.io/path: {{ $metricsPath }}
prometheus.io/port: '{{ $masterPort }}'
prometheus.io/scheme: 'http'
prometheus.io/scrape: 'true'
{{- end }}
{{- end }}
{{/*
Create worker annotations if metrics enables
*/}}
{{- define "celeborn.workerMetricsAnnotation" -}}
{{- $metricsEnabled := true -}}
{{- $metricsPath := "/metrics/prometheus" -}}
{{- $workerPort := 9096 -}}
{{- range $key, $val := .Values.celeborn }}
{{- if eq $key "celeborn.metrics.enabled" }}
{{- $metricsEnabled = $val -}}
{{- end }}
{{- if eq $key "celeborn.metrics.prometheus.path" }}
{{- $metricsPath = $val -}}
{{- end }}
{{- if eq $key "celeborn.worker.http.port" }}
{{- $workerPort = $val -}}
{{- end }}
{{- end }}
{{- if eq (toString $metricsEnabled) "true" -}}
prometheus.io/path: {{ $metricsPath }}
prometheus.io/port: '{{ $workerPort }}'
prometheus.io/scheme: 'http'
prometheus.io/scrape: 'true'
{{- end }}
{{- end }}
{{/*
Create worker Service http port params if metrics enables
*/}}
{{- define "celeborn.workerServicePort" -}}
{{- $metricsEnabled := true -}}
{{- $workerPort := 9096 -}}
{{- range $key, $val := .Values.celeborn }}
{{- if eq $key "celeborn.metrics.enabled" }}
{{- $metricsEnabled = $val -}}
{{- end }}
{{- if eq $key "celeborn.worker.http.port" }}
{{- $workerPort = $val -}}
{{- end }}
{{- end }}
{{- if eq (toString $metricsEnabled) "true" -}}
ports:
- port: {{ $workerPort }}
targetPort: {{ $workerPort }}
protocol: TCP
name: celeborn-worker-http
{{- end }}
{{- end }}

View File

@ -21,6 +21,8 @@ metadata:
name: {{ include "celeborn.masterServiceName" . }}
labels:
{{- include "celeborn.labels" . | nindent 4 }}
annotations:
{{- include "celeborn.masterMetricsAnnotation" . | nindent 4 }}
spec:
selector:
{{- include "celeborn.selectorLabels" . | nindent 4 }}

View File

@ -21,9 +21,12 @@ metadata:
name: {{ include "celeborn.workerServiceName" . }}
labels:
{{- include "celeborn.labels" . | nindent 4 }}
annotations:
{{- include "celeborn.workerMetricsAnnotation" . | nindent 4 }}
spec:
selector:
{{- include "celeborn.selectorLabels" . | nindent 4 }}
app.kubernetes.io/role: worker
type: {{ .Values.service.type }}
clusterIP: None
{{- include "celeborn.workerServicePort" . | nindent 2 }}