diff --git a/pkg/issuer/acme/BUILD.bazel b/pkg/issuer/acme/BUILD.bazel index ea25d0600..266bc012d 100644 --- a/pkg/issuer/acme/BUILD.bazel +++ b/pkg/issuer/acme/BUILD.bazel @@ -26,6 +26,7 @@ go_library( "//vendor/k8s.io/apimachinery/pkg/apis/meta/v1:go_default_library", "//vendor/k8s.io/apimachinery/pkg/labels:go_default_library", "//vendor/k8s.io/apimachinery/pkg/selection:go_default_library", + "//vendor/k8s.io/apimachinery/pkg/util/errors:go_default_library", "//vendor/k8s.io/client-go/listers/core/v1:go_default_library", "//vendor/k8s.io/utils/clock:go_default_library", ], diff --git a/pkg/issuer/acme/issue.go b/pkg/issuer/acme/issue.go index d94dc0869..dd2c25a6f 100644 --- a/pkg/issuer/acme/issue.go +++ b/pkg/issuer/acme/issue.go @@ -19,6 +19,7 @@ package acme import ( "bytes" "context" + "crypto" "crypto/x509" "encoding/json" "encoding/pem" @@ -27,10 +28,12 @@ import ( "time" "github.com/golang/glog" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/selection" + utilerrors "k8s.io/apimachinery/pkg/util/errors" "github.com/jetstack/cert-manager/pkg/acme" "github.com/jetstack/cert-manager/pkg/apis/certmanager/v1alpha1" @@ -49,84 +52,57 @@ var ( ) func (a *Acme) Issue(ctx context.Context, crt *v1alpha1.Certificate) (issuer.IssueResponse, error) { - // initially, we do not set the csr on the order resource we build. - // this is to save having the overhead of generating a new CSR in the case - // where the Order resource is up to date already + key, generated, err := a.getCertificatePrivateKey(crt) + if err != nil { + glog.Errorf("Error getting certificate private key: %v", err) + return issuer.IssueResponse{}, err + } + if generated { + // If we have generated a new private key, we return here to ensure we + // successfully persist the key before creating any CSRs with it. + glog.V(4).Infof("Storing new certificate private key for %s/%s", crt.Namespace, crt.Name) + + keyPem, err := pki.EncodePrivateKey(key) + if err != nil { + return issuer.IssueResponse{}, err + } + + return issuer.IssueResponse{ + PrivateKey: keyPem, + }, nil + } + + // Initially, we do not set the csr on the order resource we build. + // This is to save having the overhead of generating a new CSR in the case + // where the Order resource is up to date already, and also because we have + // not actually read the existing certificate private key yet to ensure it + // exists. expectedOrder, err := buildOrder(crt, nil) if err != nil { return issuer.IssueResponse{}, err } - // attempt to retrieve the existing order resource for this Certificate - var existingOrder *v1alpha1.Order - - glog.V(4).Infof("Attempting to retrieve existing orders for %q/%q", crt.Namespace, crt.Name) - labelMap := certLabels(crt.Name) - selector := labels.NewSelector() - for k, v := range labelMap { - req, err := labels.NewRequirement(k, selection.Equals, []string{v}) - if err != nil { - return issuer.IssueResponse{}, err - } - selector.Add(*req) - } - existingOrders, err := a.orderLister.Orders(crt.Namespace).List(selector) + // Cleanup Order resources that are owned by this Certificate but are not + // up to date (i.e. do not match the requirements on the Certificate). + // Because the order name returned by buildOrder is a hash of its spec, we + // can simply delete all order resources that are owned by us that do not + // have the same name. + err = a.cleanupOwnedOrders(crt, expectedOrder.Name) if err != nil { + glog.Errorf("Error cleaning up old orders: %v", err) return issuer.IssueResponse{}, err } - for _, o := range existingOrders { - // Don't touch any objects that don't have this certificate set as the - // owner reference. - if !metav1.IsControlledBy(o, crt) { - continue - } - if o.Name == expectedOrder.Name { - existingOrder = o - glog.V(4).Infof("Found existing order %q for certificate %s/%s", existingOrder.Name, crt.Namespace, crt.Name) - continue - } - - // delete any old order resources - glog.Infof("Deleting Order resource %s/%s", o.Namespace, o.Name) - err := a.CMClient.CertmanagerV1alpha1().Orders(o.Namespace).Delete(o.Name, nil) - if err != nil { - return issuer.IssueResponse{}, err - } + // Obtain the existing Order for this Certificate from the API server. + // If it does not exist, we continue on as it will be created from + // the generated expectedOrder. + existingOrder, err := a.orderLister.Orders(expectedOrder.Namespace).Get(expectedOrder.Name) + if err != nil && !apierrors.IsNotFound(err) { + glog.Errorf("Error getting existing Order resource: %v", err) + return issuer.IssueResponse{}, err } - - // get existing certificate private key - glog.V(4).Infof("Attempting to fetch existing certificate private key") - key, err := kube.SecretTLSKey(a.secretsLister, crt.Namespace, crt.Spec.SecretName) - if err != nil { - // if the private key is not found, or is formatted incorrectly, we - // will generate a new one and create/overwrite the secret. - if !apierrors.IsNotFound(err) && !errors.IsInvalidData(err) { - return issuer.IssueResponse{}, err - } - - // TODO: perhaps we shouldn't overwrite the secret if it is invalid, - // and instead bail out here? - - glog.V(4).Infof("Generating new private key for %s/%s", crt.Namespace, crt.Name) - // generate a new private key. - key, err := pki.GenerateRSAPrivateKey(2048) - if err != nil { - return issuer.IssueResponse{}, err - } - - keyBytes, err := pki.EncodePrivateKey(key) - if err != nil { - return issuer.IssueResponse{}, err - } - - glog.V(4).Infof("Storing new certificate private key for %s/%s", crt.Namespace, crt.Name) - // We return the private key here early, and trigger an immediate requeue. - // This is because we have just generated a new one, and to keep our - // later logic simple we store it immediately. - // TODO: remove the 'requeue: true' as it could cause a race condition - // where our lister has not observed the new private key generated above - return issuer.IssueResponse{PrivateKey: keyBytes}, nil + if existingOrder == nil { + return issuer.IssueResponse{}, a.createNewOrder(crt, expectedOrder, key) } // if there is an existing order, we check to make sure it is up to date @@ -136,82 +112,40 @@ func (a *Acme) Issue(ctx context.Context, crt *v1alpha1.Certificate) (issuer.Iss // well as the back-off applied to failing ACME Orders. // They should therefore *only* match on changes to the actual Certificate // resource, or underlying Order (i.e. user interaction). - if existingOrder != nil { - glog.V(4).Infof("Validating existing order CSR for Certificate %s/%s", crt.Namespace, crt.Name) + glog.V(4).Infof("Validating existing order CSR for Certificate %s/%s", crt.Namespace, crt.Name) - // if the existing order has expired, we should create a new one - // TODO: implement this order state in the acmeorders controller - if existingOrder.Status.State == v1alpha1.Expired { - return a.retryOrder(existingOrder) - } - - // check if the existing order has failed. - // If it has, we check to see when it last failed and 'back-off' if it - // failed in the recent past. - if acme.IsFailureState(existingOrder.Status.State) { - if crt.Status.LastFailureTime == nil { - nowTime := metav1.NewTime(a.clock.Now()) - crt.Status.LastFailureTime = &nowTime - } - - if time.Now().Sub(crt.Status.LastFailureTime.Time) < createOrderWaitDuration { - return issuer.IssueResponse{}, fmt.Errorf("applying acme order back-off for certificate %s/%s because it has failed within the last %s", crt.Namespace, crt.Name, createOrderWaitDuration) - } - - // otherwise, we clear the lastFailureTime and create a new order - // as the back-off time has passed. - crt.Status.LastFailureTime = nil - - return a.retryOrder(existingOrder) - } - - // check the CSR is created by the private key that we hold - csrBytes := existingOrder.Spec.CSR - if len(csrBytes) == 0 { - return a.retryOrder(existingOrder) - } - existingCSR, err := x509.ParseCertificateRequest(csrBytes) - if err != nil { - return a.retryOrder(existingOrder) - } - - matches, err := pki.PublicKeyMatchesCSR(key.Public(), existingCSR) - if err != nil { - return issuer.IssueResponse{}, err - } - - if !matches { - glog.V(4).Infof("CSR on existing order resource does not match certificate %s/%s private key. Creating new order.", crt.Namespace, crt.Name) - return a.retryOrder(existingOrder) - } + validForKey, err := existingOrderIsValidForKey(existingOrder, key) + if err != nil { + return issuer.IssueResponse{}, err + } + if !validForKey { + glog.V(4).Infof("CSR on existing order resource does not match certificate %s/%s private key. Creating new order.", crt.Namespace, crt.Name) + return a.retryOrder(crt, existingOrder) } - if existingOrder == nil { - glog.V(4).Infof("Creating new Order resource for Certificate %s/%s", crt.Namespace, crt.Name) + // If the existing order has expired, we should create a new one + // TODO: implement setting this order state in the acmeorders controller + if existingOrder.Status.State == v1alpha1.Expired { + return a.retryOrder(crt, existingOrder) + } - csr, err := pki.GenerateCSR(a.issuer, crt) - if err != nil { - // TODO: what errors can be produced here? some error types might - // be permenant, and we should handle that properly. - return issuer.IssueResponse{}, err + // If the existing order has failed, we should check if the Certificate + // already has a LastFailureTime + // - If it does not, then this is a new failure and we record the LastFailureTime + // as Now() and return + // - If it does, and it is more than the 'back-off' period ago, we retry the order + // - Otherwise we return an error to attempt re-processing at a later time + if acme.IsFailureState(existingOrder.Status.State) { + if crt.Status.LastFailureTime == nil { + nowTime := metav1.NewTime(a.clock.Now()) + crt.Status.LastFailureTime = &nowTime } - csrBytes, err := pki.EncodeCSR(csr, key) - if err != nil { - return issuer.IssueResponse{}, err + if time.Now().Sub(crt.Status.LastFailureTime.Time) < createOrderWaitDuration { + return issuer.IssueResponse{}, fmt.Errorf("applying acme order back-off for certificate %s/%s because it has failed within the last %s", crt.Namespace, crt.Name, createOrderWaitDuration) } - // set the CSR field on the order to be created - expectedOrder.Spec.CSR = csrBytes - - existingOrder, err = a.CMClient.CertmanagerV1alpha1().Orders(crt.Namespace).Create(expectedOrder) - if err != nil { - return issuer.IssueResponse{}, err - } - - glog.V(4).Infof("Created new Order resource named %q for Certificate %s/%s", expectedOrder.Name, crt.Namespace, crt.Name) - - return issuer.IssueResponse{}, nil + return a.retryOrder(crt, existingOrder) } if existingOrder.Status.State != v1alpha1.Valid { @@ -229,6 +163,8 @@ func (a *Acme) Issue(ctx context.Context, crt *v1alpha1.Certificate) (issuer.Iss return issuer.IssueResponse{}, err } + // We check the current Order's Certificate resource to see if it's nearing expiry. + // If it is, this implies that it is an old order that is now out of date. certSlice, err := cl.GetCertificate(ctx, existingOrder.Status.CertificateURL) if err != nil { // TODO: parse returned ACME error and potentially re-create order. @@ -242,13 +178,13 @@ func (a *Acme) Issue(ctx context.Context, crt *v1alpha1.Certificate) (issuer.Iss x509Cert, err := x509.ParseCertificate(certSlice[0]) if err != nil { - // TODO: parse returned ACME error and potentially re-create order. - return issuer.IssueResponse{}, fmt.Errorf("failed to parse returned x509 certificate: %v", err.Error()) + // if parsing the certificate fails, recreate the order + return a.retryOrder(crt, existingOrder) } if a.Context.IssuerOptions.CertificateNeedsRenew(x509Cert) { // existing order's certificate is near expiry - return a.retryOrder(existingOrder) + return a.retryOrder(crt, existingOrder) } // encode the retrieved certificates (including the chain) @@ -273,11 +209,114 @@ func (a *Acme) Issue(ctx context.Context, crt *v1alpha1.Certificate) (issuer.Iss }, nil } +func (a *Acme) cleanupOwnedOrders(crt *v1alpha1.Certificate, retain string) error { + labelMap := certLabels(crt.Name) + selector := labels.NewSelector() + for k, v := range labelMap { + req, err := labels.NewRequirement(k, selection.Equals, []string{v}) + if err != nil { + return err + } + selector.Add(*req) + } + existingOrders, err := a.orderLister.Orders(crt.Namespace).List(selector) + if err != nil { + return err + } + + var errs []error + for _, o := range existingOrders { + // Don't touch any objects that don't have this certificate set as the + // owner reference. + if !metav1.IsControlledBy(o, crt) { + continue + } + + if o.Name == retain { + glog.V(4).Infof("Skipping cleanup for active order resource %q", retain) + continue + } + + // delete any old order resources + glog.Infof("Deleting Order resource %s/%s", o.Namespace, o.Name) + a.Recorder.Eventf(crt, corev1.EventTypeNormal, "Cleanup", + fmt.Sprintf("Deleting old Order resource %q", o.Name)) + + err := a.CMClient.CertmanagerV1alpha1().Orders(o.Namespace).Delete(o.Name, nil) + if err != nil && !apierrors.IsNotFound(err) { + glog.Errorf("Error deleting Order resource %s/%s: %v", o.Namespace, o.Name, err) + errs = append(errs, err) + continue + } + } + + return utilerrors.NewAggregate(errs) +} + +func (a *Acme) getCertificatePrivateKey(crt *v1alpha1.Certificate) (crypto.Signer, bool, error) { + glog.V(4).Infof("Attempting to fetch existing certificate private key") + + // If a private key already exists, reuse it. + // TODO: if we have not observed the update to the Secret resource with the + // private key yet, we may in some cases loop and re-generate the private key + // over and over. We could attempt to use the live clientset to read the + // private key too to avoid this case. + key, err := kube.SecretTLSKey(a.secretsLister, crt.Namespace, crt.Spec.SecretName) + if err == nil { + return key, false, nil + } + + // We only generate a new private key if the existing one is not found or + // contains invalid data. + // TODO: should we re-generate on InvalidData? + if !apierrors.IsNotFound(err) && !errors.IsInvalidData(err) { + return nil, false, err + } + + glog.V(4).Infof("Generating new private key for %s/%s", crt.Namespace, crt.Name) + + // generate a new private key. + rsaKey, err := pki.GenerateRSAPrivateKey(2048) + if err != nil { + return nil, false, err + } + + return rsaKey, true, nil +} + +func (a *Acme) createNewOrder(crt *v1alpha1.Certificate, template *v1alpha1.Order, key crypto.Signer) error { + glog.V(4).Infof("Creating new Order resource for Certificate %s/%s", crt.Namespace, crt.Name) + + csr, err := pki.GenerateCSR(a.issuer, crt) + if err != nil { + // TODO: what errors can be produced here? some error types might + // be permenant, and we should handle that properly. + return err + } + + csrBytes, err := pki.EncodeCSR(csr, key) + if err != nil { + return err + } + + // set the CSR field on the order to be created + template.Spec.CSR = csrBytes + + _, err = a.CMClient.CertmanagerV1alpha1().Orders(template.Namespace).Create(template) + if err != nil { + return err + } + + glog.V(4).Infof("Created new Order resource named %q for Certificate %s/%s", template.Name, crt.Namespace, crt.Name) + + return nil +} + // retryOrder will delete the existing order with the foreground // deletion policy. // If delete successfully (i.e. cleaned up), the order name will be // reset to empty and a resync of the resource will begin. -func (a *Acme) retryOrder(existingOrder *v1alpha1.Order) (issuer.IssueResponse, error) { +func (a *Acme) retryOrder(crt *v1alpha1.Certificate, existingOrder *v1alpha1.Order) (issuer.IssueResponse, error) { foregroundDeletion := metav1.DeletePropagationForeground err := a.CMClient.CertmanagerV1alpha1().Orders(existingOrder.Namespace).Delete(existingOrder.Name, &metav1.DeleteOptions{ PropagationPolicy: &foregroundDeletion, @@ -286,6 +325,8 @@ func (a *Acme) retryOrder(existingOrder *v1alpha1.Order) (issuer.IssueResponse, return issuer.IssueResponse{}, err } + crt.Status.LastFailureTime = nil + // Updating the certificate status will trigger a requeue once the change // has been observed by the informer. // If we set Requeue: true here, we may cause a race where the lister has @@ -293,6 +334,31 @@ func (a *Acme) retryOrder(existingOrder *v1alpha1.Order) (issuer.IssueResponse, return issuer.IssueResponse{}, nil } +func existingOrderIsValidForKey(o *v1alpha1.Order, key crypto.Signer) (bool, error) { + // check the CSR is created by the private key that we hold + csrBytes := o.Spec.CSR + if len(csrBytes) == 0 { + // Handles a weird case where an Order exists *without* a CSR set + return false, nil + } + existingCSR, err := x509.ParseCertificateRequest(csrBytes) + if err != nil { + // Absorb invalid CSR datas as 'not valid' + return false, nil + } + + matches, err := pki.PublicKeyMatchesCSR(key.Public(), existingCSR) + if err != nil { + // If this returns an error, something bad happened parsing somewhere + return false, err + } + if !matches { + return false, nil + } + + return true, nil +} + func buildOrder(crt *v1alpha1.Certificate, csr []byte) (*v1alpha1.Order, error) { spec := v1alpha1.OrderSpec{ CSR: csr,