cert-manager/pkg/metrics/metrics.go
JoshVanL 63c5e5f5c6
Cleans up metrics pkg to not require locks, and split out resources into
different files

Signed-off-by: JoshVanL <vleeuwenjoshua@gmail.com>
2020-05-21 10:47:00 +01:00

186 lines
6.2 KiB
Go

/*
Copyright 2020 The Jetstack cert-manager contributors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// Package metrics contains global structures related to metrics collection
// cert-manager exposes the following metrics:
// certificate_expiration_timestamp_seconds{name, namespace}
// certificate_ready_status{name, namespace, condition}
// acme_client_request_count{"scheme", "host", "path", "method", "status"}
// acme_client_request_duration_seconds{"scheme", "host", "path", "method", "status"}
// controller_sync_call_count{"controller"}
package metrics
import (
"context"
"net/http"
"time"
"github.com/go-logr/logr"
"github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
cmmeta "github.com/jetstack/cert-manager/pkg/apis/meta/v1"
)
const (
// Namespace is the namespace for cert-manager metric names
namespace = "certmanager"
prometheusMetricsServerShutdownTimeout = 5 * time.Second
prometheusMetricsServerReadTimeout = 8 * time.Second
prometheusMetricsServerWriteTimeout = 8 * time.Second
prometheusMetricsServerMaxHeaderBytes = 1 << 20 // 1 MiB
)
// Metrics is designed to be a shared object for updating the metrics exposed
// by cert-manager
type Metrics struct {
log logr.Logger
registry *prometheus.Registry
server *http.Server
certificateExpiryTimeSeconds *prometheus.GaugeVec
certificateReadyStatus *prometheus.GaugeVec
acmeClientRequestDurationSeconds *prometheus.SummaryVec
acmeClientRequestCount *prometheus.CounterVec
controllerSyncCallCount *prometheus.CounterVec
}
var readyConditionStatuses = [...]cmmeta.ConditionStatus{cmmeta.ConditionTrue, cmmeta.ConditionFalse, cmmeta.ConditionUnknown}
func New(log logr.Logger, listenAddress string) *Metrics {
var (
certificateExpiryTimeSeconds = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "certificate_expiration_timestamp_seconds",
Help: "The date after which the certificate expires. Expressed as a Unix Epoch Time.",
},
[]string{"name", "namespace"},
)
certificateReadyStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Namespace: namespace,
Name: "certificate_ready_status",
Help: "The ready status of the certificate.",
},
[]string{"name", "namespace", "condition"},
)
// acmeClientRequestCount is a Prometheus summary to collect the number of
// requests made to each endpoint with the ACME client.
acmeClientRequestCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Name: "acme_client_request_count",
Help: "The number of requests made by the ACME client.",
Subsystem: "http",
},
[]string{"scheme", "host", "path", "method", "status"},
)
// acmeClientRequestDurationSeconds is a Prometheus summary to collect request
// times for the ACME client.
acmeClientRequestDurationSeconds = prometheus.NewSummaryVec(
prometheus.SummaryOpts{
Namespace: namespace,
Name: "acme_client_request_duration_seconds",
Help: "The HTTP request latencies in seconds for the ACME client.",
Subsystem: "http",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
},
[]string{"scheme", "host", "path", "method", "status"},
)
controllerSyncCallCount = prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: namespace,
Name: "controller_sync_call_count",
Help: "The number of sync() calls made by a controller.",
},
[]string{"controller"},
)
)
router := mux.NewRouter()
// Create server and register Prometheus metrics handler
m := &Metrics{
log: log.WithName("metrics"),
registry: prometheus.NewRegistry(),
server: &http.Server{
Addr: listenAddress,
ReadTimeout: prometheusMetricsServerReadTimeout,
WriteTimeout: prometheusMetricsServerWriteTimeout,
MaxHeaderBytes: prometheusMetricsServerMaxHeaderBytes,
Handler: router,
},
certificateExpiryTimeSeconds: certificateExpiryTimeSeconds,
certificateReadyStatus: certificateReadyStatus,
acmeClientRequestCount: acmeClientRequestCount,
acmeClientRequestDurationSeconds: acmeClientRequestDurationSeconds,
controllerSyncCallCount: controllerSyncCallCount,
}
router.Handle("/metrics", promhttp.HandlerFor(m.registry, promhttp.HandlerOpts{}))
return m
}
// Start will register the Prometheu metrics, and start the Prometheus server
func (m *Metrics) Start(stopCh <-chan struct{}) {
m.registry.MustRegister(m.certificateExpiryTimeSeconds)
m.registry.MustRegister(m.certificateReadyStatus)
m.registry.MustRegister(m.acmeClientRequestDurationSeconds)
m.registry.MustRegister(m.acmeClientRequestCount)
m.registry.MustRegister(m.controllerSyncCallCount)
go func() {
log := m.log.WithValues("address", m.server.Addr)
log.Info("listening for connections on")
if err := m.server.ListenAndServe(); err != nil {
log.Error(err, "error running prometheus metrics server")
return
}
log.Info("prometheus metrics server exited")
}()
<-stopCh
m.shutdown()
}
// IncrementSyncCallCount will increase the sync counter for that controller.
func (m *Metrics) IncrementSyncCallCount(controllerName string) {
m.controllerSyncCallCount.WithLabelValues(controllerName).Inc()
}
func (m *Metrics) shutdown() {
m.log.Info("stopping Prometheus metrics server...")
ctx, cancel := context.WithTimeout(context.Background(), prometheusMetricsServerShutdownTimeout)
defer cancel()
if err := m.server.Shutdown(ctx); err != nil {
m.log.Error(err, "prometheus metrics server shutdown failed", err)
return
}
m.log.Info("prometheus metrics server gracefully stopped")
}