Merge pull request #1811 from cheukwing/issue-1679
Add Prometheus metrics for Certificate ready status
This commit is contained in:
commit
65138f5e8c
@ -585,6 +585,8 @@ func generateLocallySignedTemporaryCertificate(crt *v1alpha1.Certificate, pk []b
|
||||
}
|
||||
|
||||
func (c *controller) updateCertificateStatus(ctx context.Context, old, new *v1alpha1.Certificate) (*v1alpha1.Certificate, error) {
|
||||
defer c.metrics.UpdateCertificateStatus(new)
|
||||
|
||||
log := logf.FromContext(ctx, "updateStatus")
|
||||
oldBytes, _ := json.Marshal(old.Status)
|
||||
newBytes, _ := json.Marshal(new.Status)
|
||||
|
||||
@ -17,6 +17,7 @@ limitations under the License.
|
||||
// Package metrics contains global structures related to metrics collection
|
||||
// cert-manager exposes the following metrics:
|
||||
// certificate_expiration_timestamp_seconds{name, namespace}
|
||||
// certificate_ready_status{name, namespace, condition}
|
||||
package metrics
|
||||
|
||||
import (
|
||||
@ -52,6 +53,8 @@ const (
|
||||
prometheusMetricsServerMaxHeaderBytes = 1 << 20 // 1 MiB
|
||||
)
|
||||
|
||||
var readyConditionStatuses = [...]string{string(v1alpha1.ConditionTrue), string(v1alpha1.ConditionFalse), string(v1alpha1.ConditionUnknown)}
|
||||
|
||||
// Default set of metrics
|
||||
var Default = New(logf.NewContext(context.Background(), logf.Log.WithName("metrics")))
|
||||
|
||||
@ -64,6 +67,15 @@ var CertificateExpiryTimeSeconds = prometheus.NewGaugeVec(
|
||||
[]string{"name", "namespace"},
|
||||
)
|
||||
|
||||
var CertificateReadyStatus = prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Name: "certificate_ready_status",
|
||||
Help: "The ready status of the certificate.",
|
||||
},
|
||||
[]string{"name", "namespace", "condition"},
|
||||
)
|
||||
|
||||
// ACMEClientRequestCount is a Prometheus summary to collect the number of
|
||||
// requests made to each endpoint with the ACME client.
|
||||
var ACMEClientRequestCount = prometheus.NewCounterVec(
|
||||
@ -109,6 +121,17 @@ var registeredCertificates = &struct {
|
||||
|
||||
var activeCertificates cmlisters.CertificateLister
|
||||
|
||||
// cleanUpFunctions are functions called to clean up metrics which refer to
|
||||
// deleted certificates, inputs are name and namespace of the certificate
|
||||
var cleanUpFunctions = []func(string, string){
|
||||
metricCleanUpCertificate(CertificateExpiryTimeSeconds),
|
||||
metricCleanUpCertificateWith(CertificateReadyStatus, readyConditionStatuses[:]),
|
||||
}
|
||||
|
||||
type cleanableMetric interface {
|
||||
DeleteLabelValues(...string) bool
|
||||
}
|
||||
|
||||
type Metrics struct {
|
||||
ctx context.Context
|
||||
http.Server
|
||||
@ -117,6 +140,7 @@ type Metrics struct {
|
||||
// TODO (@dippynark): switch this to use an interface to make it testable
|
||||
registry *prometheus.Registry
|
||||
CertificateExpiryTimeSeconds *prometheus.GaugeVec
|
||||
CertificateReadyStatus *prometheus.GaugeVec
|
||||
ACMEClientRequestDurationSeconds *prometheus.SummaryVec
|
||||
ACMEClientRequestCount *prometheus.CounterVec
|
||||
ControllerSyncCallCount *prometheus.CounterVec
|
||||
@ -138,6 +162,7 @@ func New(ctx context.Context) *Metrics {
|
||||
activeCertificates: nil,
|
||||
registry: prometheus.NewRegistry(),
|
||||
CertificateExpiryTimeSeconds: CertificateExpiryTimeSeconds,
|
||||
CertificateReadyStatus: CertificateReadyStatus,
|
||||
ACMEClientRequestDurationSeconds: ACMEClientRequestDurationSeconds,
|
||||
ACMEClientRequestCount: ACMEClientRequestCount,
|
||||
ControllerSyncCallCount: ControllerSyncCallCount,
|
||||
@ -168,6 +193,7 @@ func (m *Metrics) Start(stopCh <-chan struct{}) {
|
||||
log := logf.FromContext(m.ctx)
|
||||
|
||||
m.registry.MustRegister(m.CertificateExpiryTimeSeconds)
|
||||
m.registry.MustRegister(m.CertificateReadyStatus)
|
||||
m.registry.MustRegister(m.ACMEClientRequestDurationSeconds)
|
||||
m.registry.MustRegister(m.ACMEClientRequestCount)
|
||||
m.registry.MustRegister(m.ControllerSyncCallCount)
|
||||
@ -184,6 +210,7 @@ func (m *Metrics) Start(stopCh <-chan struct{}) {
|
||||
|
||||
}()
|
||||
|
||||
// clean up metrics referring to deleted resources every minute
|
||||
go wait.Until(func() { m.cleanUp() }, time.Minute, stopCh)
|
||||
|
||||
m.waitShutdown(stopCh)
|
||||
@ -221,6 +248,50 @@ func updateX509Expiry(crt *v1alpha1.Certificate, cert *x509.Certificate) {
|
||||
CertificateExpiryTimeSeconds.With(prometheus.Labels{
|
||||
"name": crt.Name,
|
||||
"namespace": crt.Namespace}).Set(float64(expiryTime.Unix()))
|
||||
registerCertificateKey(key)
|
||||
}
|
||||
|
||||
func (m *Metrics) UpdateCertificateStatus(crt *v1alpha1.Certificate) {
|
||||
log := logf.FromContext(m.ctx)
|
||||
log = logf.WithResource(log, crt)
|
||||
|
||||
log.V(logf.DebugLevel).Info("attempting to retrieve ready status for certificate")
|
||||
for _, c := range crt.Status.Conditions {
|
||||
switch c.Type {
|
||||
case v1alpha1.CertificateConditionReady:
|
||||
updateCertificateReadyStatus(crt, c.Status)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func updateCertificateReadyStatus(crt *v1alpha1.Certificate, current v1alpha1.ConditionStatus) {
|
||||
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(crt)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
registeredCertificates.mtx.Lock()
|
||||
defer registeredCertificates.mtx.Unlock()
|
||||
for _, condition := range readyConditionStatuses {
|
||||
value := 0.0
|
||||
if string(current) == condition {
|
||||
value = 1.0
|
||||
}
|
||||
CertificateReadyStatus.With(prometheus.Labels{
|
||||
"name": crt.Name,
|
||||
"namespace": crt.Namespace,
|
||||
"condition": string(condition),
|
||||
}).Set(value)
|
||||
}
|
||||
registerCertificateKey(key)
|
||||
}
|
||||
|
||||
// registerCertificateKey adds an entry in registeredCertificates to track
|
||||
// which certificates have metrics stored in prometheus, allowing for easier
|
||||
// clean-up.
|
||||
// You MUST lock the mutex before calling this function, this ensures no other
|
||||
// function is cleaning up while we are registering a certificate
|
||||
func registerCertificateKey(key string) {
|
||||
registeredCertificates.certificates[key] = struct{}{}
|
||||
}
|
||||
|
||||
@ -228,6 +299,7 @@ func (m *Metrics) SetActiveCertificates(cl cmlisters.CertificateLister) {
|
||||
m.activeCertificates = cl
|
||||
}
|
||||
|
||||
// cleanUp removes any metrics which reference resources which no longer exist
|
||||
func (m *Metrics) cleanUp() {
|
||||
log := logf.FromContext(m.ctx)
|
||||
log.V(logf.DebugLevel).Info("attempting to clean up metrics for recently deleted certificates")
|
||||
@ -246,6 +318,7 @@ func (m *Metrics) cleanUp() {
|
||||
cleanUpCertificates(activeCrts)
|
||||
}
|
||||
|
||||
// cleanUpCertificates removes metrics for recently deleted certificates
|
||||
func cleanUpCertificates(activeCrts []*v1alpha1.Certificate) {
|
||||
activeMap := make(map[string]struct{}, len(activeCrts))
|
||||
for _, crt := range activeCrts {
|
||||
@ -267,19 +340,47 @@ func cleanUpCertificates(activeCrts []*v1alpha1.Certificate) {
|
||||
}
|
||||
|
||||
for _, key := range toCleanUp {
|
||||
namespace, name, err := cache.SplitMetaNamespaceKey(key)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
|
||||
CertificateExpiryTimeSeconds.Delete(prometheus.Labels{
|
||||
"name": name,
|
||||
"namespace": namespace,
|
||||
})
|
||||
delete(registeredCertificates.certificates, key)
|
||||
cleanUpCertificateByKey(key)
|
||||
}
|
||||
}
|
||||
|
||||
// metricCleanUpCertificate creates a clean up function which deletes the entry
|
||||
// (if any) for a certificate in the given metric
|
||||
func metricCleanUpCertificate(c cleanableMetric) func(string, string) {
|
||||
return func(name, namespace string) {
|
||||
c.DeleteLabelValues(name, namespace)
|
||||
}
|
||||
}
|
||||
|
||||
// metricCleanUpCertificateWith creates a clean up function which deletes the
|
||||
// entries (if any) for a certificate in the given metric, iterating over the
|
||||
// additional labels.
|
||||
// This is used if the metric keys on data in addition to the name and
|
||||
// namespace.
|
||||
func metricCleanUpCertificateWith(c cleanableMetric, additionalLabels []string) func(string, string) {
|
||||
return func(name, namespace string) {
|
||||
for _, label := range additionalLabels {
|
||||
c.DeleteLabelValues(name, namespace, label)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// cleanUpCertificateByKey removes metrics which refer to a certificate,
|
||||
// given the key of the certificate.
|
||||
func cleanUpCertificateByKey(key string) {
|
||||
namespace, name, err := cache.SplitMetaNamespaceKey(key)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
// apply all the clean up functions
|
||||
for _, f := range cleanUpFunctions {
|
||||
f(name, namespace)
|
||||
}
|
||||
|
||||
delete(registeredCertificates.certificates, key)
|
||||
}
|
||||
|
||||
func (m *Metrics) IncrementSyncCallCount(controllerName string) {
|
||||
log := logf.FromContext(m.ctx)
|
||||
log.V(logf.DebugLevel).Info("incrementing controller sync call count", "controllerName", controllerName)
|
||||
|
||||
@ -28,6 +28,33 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
func getReadyConditionStatus(crt *v1alpha1.Certificate) v1alpha1.ConditionStatus {
|
||||
for _, c := range crt.Status.Conditions {
|
||||
switch c.Type {
|
||||
case v1alpha1.CertificateConditionReady:
|
||||
return c.Status
|
||||
}
|
||||
}
|
||||
return v1alpha1.ConditionUnknown
|
||||
}
|
||||
|
||||
func buildCertificate(name, namespace string, condition v1alpha1.ConditionStatus) *v1alpha1.Certificate {
|
||||
return &v1alpha1.Certificate{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: name,
|
||||
Namespace: namespace,
|
||||
},
|
||||
Status: v1alpha1.CertificateStatus{
|
||||
Conditions: []v1alpha1.CertificateCondition{
|
||||
{
|
||||
Type: v1alpha1.CertificateConditionReady,
|
||||
Status: condition,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateCertificateExpiry(t *testing.T) {
|
||||
const metadata = `
|
||||
# HELP certmanager_certificate_expiration_timestamp_seconds The date after which the certificate expires. Expressed as a Unix Epoch Time.
|
||||
@ -72,94 +99,135 @@ func TestUpdateCertificateExpiry(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCleanUp(t *testing.T) {
|
||||
func TestUpdateCertificateReadyStatus(t *testing.T) {
|
||||
const metadata = `
|
||||
# HELP certmanager_certificate_expiration_timestamp_seconds The date after which the certificate expires. Expressed as a Unix Epoch Time.
|
||||
# TYPE certmanager_certificate_expiration_timestamp_seconds gauge
|
||||
# HELP certmanager_certificate_ready_status The ready status of the certificate.
|
||||
# TYPE certmanager_certificate_ready_status gauge
|
||||
`
|
||||
|
||||
type testT struct {
|
||||
active map[*v1alpha1.Certificate]*x509.Certificate
|
||||
inactive map[*v1alpha1.Certificate]*x509.Certificate
|
||||
crt *v1alpha1.Certificate
|
||||
expected string
|
||||
}
|
||||
tests := map[string]testT{
|
||||
"active and inactive": {
|
||||
"ready status true is updated correctly": {
|
||||
crt: buildCertificate("something", "default", v1alpha1.ConditionTrue),
|
||||
expected: `
|
||||
certmanager_certificate_ready_status{condition="False",name="something",namespace="default"} 0
|
||||
certmanager_certificate_ready_status{condition="True",name="something",namespace="default"} 1
|
||||
certmanager_certificate_ready_status{condition="Unknown",name="something",namespace="default"} 0
|
||||
`,
|
||||
},
|
||||
"ready status false is updated correctly": {
|
||||
crt: buildCertificate("something", "default", v1alpha1.ConditionFalse),
|
||||
expected: `
|
||||
certmanager_certificate_ready_status{condition="False",name="something",namespace="default"} 1
|
||||
certmanager_certificate_ready_status{condition="True",name="something",namespace="default"} 0
|
||||
certmanager_certificate_ready_status{condition="Unknown",name="something",namespace="default"} 0
|
||||
`,
|
||||
},
|
||||
"ready status unknown is updated correctly": {
|
||||
crt: buildCertificate("something", "default", v1alpha1.ConditionUnknown),
|
||||
expected: `
|
||||
certmanager_certificate_ready_status{condition="False",name="something",namespace="default"} 0
|
||||
certmanager_certificate_ready_status{condition="True",name="something",namespace="default"} 0
|
||||
certmanager_certificate_ready_status{condition="Unknown",name="something",namespace="default"} 1
|
||||
`,
|
||||
},
|
||||
}
|
||||
for n, test := range tests {
|
||||
t.Run(n, func(t *testing.T) {
|
||||
updateCertificateReadyStatus(test.crt, getReadyConditionStatus(test.crt))
|
||||
|
||||
if err := testutil.CollectAndCompare(
|
||||
CertificateReadyStatus,
|
||||
strings.NewReader(metadata+test.expected),
|
||||
"certmanager_certificate_ready_status",
|
||||
); err != nil {
|
||||
t.Errorf("unexpected collecting result:\n%s", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestCleanUp(t *testing.T) {
|
||||
const metadataExpiry = `
|
||||
# HELP certmanager_certificate_expiration_timestamp_seconds The date after which the certificate expires. Expressed as a Unix Epoch Time.
|
||||
# TYPE certmanager_certificate_expiration_timestamp_seconds gauge
|
||||
`
|
||||
|
||||
const metadataReady = `
|
||||
# HELP certmanager_certificate_ready_status The ready status of the certificate.
|
||||
# TYPE certmanager_certificate_ready_status gauge
|
||||
`
|
||||
type testT struct {
|
||||
active map[*v1alpha1.Certificate]*x509.Certificate
|
||||
inactive map[*v1alpha1.Certificate]*x509.Certificate
|
||||
expectedExpiry string
|
||||
expectedReady string
|
||||
}
|
||||
tests := map[string]testT{
|
||||
"inactive certificate metrics cleaned up while active certificate metrics kept": {
|
||||
active: map[*v1alpha1.Certificate]*x509.Certificate{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "something",
|
||||
Namespace: "default",
|
||||
},
|
||||
}: {
|
||||
buildCertificate("active", "default", v1alpha1.ConditionTrue): {
|
||||
// fixed expiry time for testing
|
||||
NotAfter: time.Unix(2208988804, 0),
|
||||
},
|
||||
},
|
||||
inactive: map[*v1alpha1.Certificate]*x509.Certificate{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "something-else",
|
||||
Namespace: "default",
|
||||
},
|
||||
}: {
|
||||
buildCertificate("inactive", "default", v1alpha1.ConditionTrue): {
|
||||
// fixed expiry time for testing
|
||||
NotAfter: time.Unix(2208988804, 0),
|
||||
},
|
||||
},
|
||||
expected: `
|
||||
certmanager_certificate_expiration_timestamp_seconds{name="something",namespace="default"} 2.208988804e+09
|
||||
expectedExpiry: `
|
||||
certmanager_certificate_expiration_timestamp_seconds{name="active",namespace="default"} 2.208988804e+09
|
||||
`,
|
||||
expectedReady: `
|
||||
certmanager_certificate_ready_status{condition="False",name="active",namespace="default"} 0
|
||||
certmanager_certificate_ready_status{condition="True",name="active",namespace="default"} 1
|
||||
certmanager_certificate_ready_status{condition="Unknown",name="active",namespace="default"} 0
|
||||
`,
|
||||
},
|
||||
"only active": {
|
||||
"no metrics cleaned up when only active certificate metrics": {
|
||||
active: map[*v1alpha1.Certificate]*x509.Certificate{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "something",
|
||||
Namespace: "default",
|
||||
},
|
||||
}: {
|
||||
buildCertificate("active", "default", v1alpha1.ConditionTrue): {
|
||||
// fixed expiry time for testing
|
||||
NotAfter: time.Unix(2208988804, 0),
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "something-else",
|
||||
Namespace: "default",
|
||||
},
|
||||
}: {
|
||||
buildCertificate("also-active", "default", v1alpha1.ConditionTrue): {
|
||||
// fixed expiry time for testing
|
||||
NotAfter: time.Unix(2208988804, 0),
|
||||
},
|
||||
},
|
||||
inactive: map[*v1alpha1.Certificate]*x509.Certificate{},
|
||||
expected: `
|
||||
certmanager_certificate_expiration_timestamp_seconds{name="something",namespace="default"} 2.208988804e+09
|
||||
certmanager_certificate_expiration_timestamp_seconds{name="something-else",namespace="default"} 2.208988804e+09
|
||||
expectedExpiry: `
|
||||
certmanager_certificate_expiration_timestamp_seconds{name="active",namespace="default"} 2.208988804e+09
|
||||
certmanager_certificate_expiration_timestamp_seconds{name="also-active",namespace="default"} 2.208988804e+09
|
||||
`,
|
||||
expectedReady: `
|
||||
certmanager_certificate_ready_status{condition="False",name="active",namespace="default"} 0
|
||||
certmanager_certificate_ready_status{condition="False",name="also-active",namespace="default"} 0
|
||||
certmanager_certificate_ready_status{condition="True",name="active",namespace="default"} 1
|
||||
certmanager_certificate_ready_status{condition="True",name="also-active",namespace="default"} 1
|
||||
certmanager_certificate_ready_status{condition="Unknown",name="active",namespace="default"} 0
|
||||
certmanager_certificate_ready_status{condition="Unknown",name="also-active",namespace="default"} 0
|
||||
`,
|
||||
},
|
||||
"only inactive": {
|
||||
"all metrics cleaned up when only inactive certificate metrics": {
|
||||
active: map[*v1alpha1.Certificate]*x509.Certificate{},
|
||||
inactive: map[*v1alpha1.Certificate]*x509.Certificate{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "something",
|
||||
Namespace: "default",
|
||||
},
|
||||
}: {
|
||||
buildCertificate("inactive", "default", v1alpha1.ConditionTrue): {
|
||||
// fixed expiry time for testing
|
||||
NotAfter: time.Unix(2208988804, 0),
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Name: "something-else",
|
||||
Namespace: "default",
|
||||
},
|
||||
}: {
|
||||
buildCertificate("also-inactive", "default", v1alpha1.ConditionTrue): {
|
||||
// fixed expiry time for testing
|
||||
NotAfter: time.Unix(2208988804, 0),
|
||||
},
|
||||
},
|
||||
expected: "",
|
||||
expectedExpiry: "",
|
||||
expectedReady: "",
|
||||
},
|
||||
}
|
||||
for n, test := range tests {
|
||||
@ -169,9 +237,11 @@ func TestCleanUp(t *testing.T) {
|
||||
var activeCrts []*v1alpha1.Certificate
|
||||
for crt, cert := range test.active {
|
||||
updateX509Expiry(crt, cert)
|
||||
updateCertificateReadyStatus(crt, getReadyConditionStatus(crt))
|
||||
activeCrts = append(activeCrts, crt)
|
||||
}
|
||||
for crt, cert := range test.inactive {
|
||||
updateCertificateReadyStatus(crt, getReadyConditionStatus(crt))
|
||||
updateX509Expiry(crt, cert)
|
||||
}
|
||||
|
||||
@ -179,11 +249,19 @@ func TestCleanUp(t *testing.T) {
|
||||
|
||||
if err := testutil.CollectAndCompare(
|
||||
CertificateExpiryTimeSeconds,
|
||||
strings.NewReader(metadata+test.expected),
|
||||
strings.NewReader(metadataExpiry+test.expectedExpiry),
|
||||
"certmanager_certificate_expiration_timestamp_seconds",
|
||||
); err != nil {
|
||||
t.Errorf("unexpected collecting result:\n%s", err)
|
||||
}
|
||||
|
||||
if err := testutil.CollectAndCompare(
|
||||
CertificateReadyStatus,
|
||||
strings.NewReader(metadataReady+test.expectedReady),
|
||||
"certmanager_certificate_ready_status",
|
||||
); err != nil {
|
||||
t.Errorf("unexpected collecting result:\n%s", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user