diff --git a/pkg/acme/acme.go b/pkg/acme/acme.go index 3ae3991fe..37971184a 100644 --- a/pkg/acme/acme.go +++ b/pkg/acme/acme.go @@ -157,7 +157,7 @@ func (h *helperImpl) ClientForIssuer(iss cmapi.GenericIssuer) (acme.Interface, e // In future, we may change to having two global HTTP clients - one that ignores // TLS connection errors, and the other that does not. func buildHTTPClient(skipTLSVerify bool) *http.Client { - return &http.Client{ + return acme.NewInstrumentedClient(&http.Client{ Transport: &http.Transport{ Proxy: http.ProxyFromEnvironment, DialContext: dialTimeout, @@ -168,7 +168,7 @@ func buildHTTPClient(skipTLSVerify bool) *http.Client { ExpectContinueTimeout: 1 * time.Second, }, Timeout: time.Second * 30, - } + }) } var timeout = time.Duration(5 * time.Second) diff --git a/pkg/acme/client/BUILD.bazel b/pkg/acme/client/BUILD.bazel index a3dc125ae..76ca337dc 100644 --- a/pkg/acme/client/BUILD.bazel +++ b/pkg/acme/client/BUILD.bazel @@ -4,11 +4,15 @@ go_library( name = "go_default_library", srcs = [ "fake.go", + "http.go", "interfaces.go", ], importpath = "github.com/jetstack/cert-manager/pkg/acme/client", visibility = ["//visibility:public"], - deps = ["//third_party/crypto/acme:go_default_library"], + deps = [ + "//pkg/metrics:go_default_library", + "//third_party/crypto/acme:go_default_library", + ], ) filegroup( diff --git a/pkg/acme/client/http.go b/pkg/acme/client/http.go new file mode 100644 index 000000000..d344bc563 --- /dev/null +++ b/pkg/acme/client/http.go @@ -0,0 +1,108 @@ +/* +Copyright 2019 The Jetstack cert-manager contributors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package client + +import ( + "fmt" + "net/http" + "strings" + "time" + + "github.com/jetstack/cert-manager/pkg/metrics" +) + +// This file implements a custom instrumented HTTP client round tripper that +// exposes prometheus metrics for each endpoint called. +// +// We implement this as part of the HTTP client to ensure we don't miss any +// calls made to the ACME server caused by retries in the underlying ACME +// library. + +// Transport is a http.RoundTripper that collects Prometheus metrics of every +// request it processes. It allows to be configured with callbacks that process +// request path and query into a suitable label value. +type Transport struct { + next http.RoundTripper +} + +// pathProcessor will trim the provided path to only include the first 2 +// segments in order to reduce the number of prometheus labels generated +func pathProcessor(path string) string { + p := strings.Split(path, "/") + // only record the first two path segments as a prometheus label value + if len(p) > 3 { + p = p[:3] + } + return strings.Join(p, "/") +} + +// RoundTrip implements http.RoundTripper. It forwards the request to the +// next RoundTripper and measures the time it took in Prometheus summary. +func (it *Transport) RoundTrip(req *http.Request) (*http.Response, error) { + statusCode := 999 + + // Remember the current time. + now := time.Now() + + // Make the request using the next RoundTripper. + resp, err := it.next.RoundTrip(req) + if resp != nil { + statusCode = resp.StatusCode + } + + labels := []string{ + req.URL.Scheme, + req.URL.Host, + pathProcessor(req.URL.Path), + req.Method, + fmt.Sprintf("%d", statusCode), + } + // Observe the time it took to make the request. + metrics.Default.ACMEClientRequestDurationSeconds. + WithLabelValues(labels...). + Observe(time.Since(now).Seconds()) + + metrics.Default.ACMEClientRequestCount. + WithLabelValues(labels...).Inc() + + // return the response and error reported from the next RoundTripper. + return resp, err +} + +// NewInstrumentedClient takes a *http.Client and returns a *http.Client that +// has its RoundTripper wrapped with instrumentation. +func NewInstrumentedClient(next *http.Client) *http.Client { + // If next client is not defined we'll use http.DefaultClient. + if next == nil { + next = http.DefaultClient + } + + next.Transport = newTransport(next.Transport) + + return next +} + +// NewTransport takes a http.RoundTripper, wraps it with instrumentation and +// returns it as a new http.RoundTripper. +func newTransport(next http.RoundTripper) http.RoundTripper { + // If next RoundTripper is not defined we'll use http.DefaultTransport. + if next == nil { + next = http.DefaultTransport + } + + return &Transport{next: next} +} diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index d671e188a..2be41b245 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -61,12 +61,39 @@ var CertificateExpiryTimeSeconds = prometheus.NewGaugeVec( []string{"name", "namespace"}, ) +// ACMEClientRequestCount is a Prometheus summary to collect the number of +// requests made to each endpoint with the ACME client. +var ACMEClientRequestCount = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "acme_client_request_count", + Help: "The number of requests made by the ACME client.", + Subsystem: "http", + }, + []string{"scheme", "host", "path", "method", "status"}, +) + +// ACMEClientRequestDurationSeconds is a Prometheus summary to collect request +// times for the ACME client. +var ACMEClientRequestDurationSeconds = prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "acme_client_request_duration_seconds", + Help: "The HTTP request latencies in seconds for the ACME client.", + Subsystem: "http", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }, + []string{"scheme", "host", "path", "method", "status"}, +) + type Metrics struct { http.Server // TODO (@dippynark): switch this to use an interface to make it testable - registry *prometheus.Registry - CertificateExpiryTimeSeconds *prometheus.GaugeVec + registry *prometheus.Registry + CertificateExpiryTimeSeconds *prometheus.GaugeVec + ACMEClientRequestDurationSeconds *prometheus.SummaryVec + ACMEClientRequestCount *prometheus.CounterVec } func New() *Metrics { @@ -82,8 +109,10 @@ func New() *Metrics { MaxHeaderBytes: prometheusMetricsServerMaxHeaderBytes, Handler: router, }, - registry: prometheus.NewRegistry(), - CertificateExpiryTimeSeconds: CertificateExpiryTimeSeconds, + registry: prometheus.NewRegistry(), + CertificateExpiryTimeSeconds: CertificateExpiryTimeSeconds, + ACMEClientRequestDurationSeconds: ACMEClientRequestDurationSeconds, + ACMEClientRequestCount: ACMEClientRequestCount, } router.Handle("/metrics", promhttp.HandlerFor(s.registry, promhttp.HandlerOpts{})) @@ -107,8 +136,9 @@ func (m *Metrics) waitShutdown(stopCh <-chan struct{}) { } func (m *Metrics) Start(stopCh <-chan struct{}) { - m.registry.MustRegister(m.CertificateExpiryTimeSeconds) + m.registry.MustRegister(m.ACMEClientRequestDurationSeconds) + m.registry.MustRegister(m.ACMEClientRequestCount) go func() {