From f5a73a9eadcb62a72ec7315464ce2d65dbdd7c10 Mon Sep 17 00:00:00 2001 From: Tim Ramlot <42113979+inteon@users.noreply.github.com> Date: Thu, 18 Apr 2024 21:26:43 +0200 Subject: [PATCH] fix bug in dynamic source Signed-off-by: Tim Ramlot <42113979+inteon@users.noreply.github.com> --- pkg/server/tls/dynamic_source.go | 51 ++++++++++++++------------- pkg/server/tls/dynamic_source_test.go | 13 ++++--- 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/pkg/server/tls/dynamic_source.go b/pkg/server/tls/dynamic_source.go index 92db36460..34e7ffc76 100644 --- a/pkg/server/tls/dynamic_source.go +++ b/pkg/server/tls/dynamic_source.go @@ -118,44 +118,37 @@ func (f *DynamicSource) Start(ctx context.Context) error { // channel which will be notified when the leaf certificate reaches 2/3 of its lifetime // and needs to be renewed - renewalChan := make(chan struct{}) + renewalChan := make(chan struct{}, 1) group.Go(func() error { - // At this point, we expect to have one renewal moment - // in the channel, so we can start the timer with that value var renewMoment time.Time - select { - case renewMoment = <-nextRenewCh: - // We recevieved a renew moment - default: - // This should never happen - panic("Unreacheable") - } for { if done := func() bool { - timer := time.NewTimer(time.Until(renewMoment)) - defer timer.Stop() + var timerChannel <-chan time.Time + if !renewMoment.IsZero() { + timer := time.NewTimer(time.Until(renewMoment)) + defer timer.Stop() + + renewMoment = time.Time{} + timerChannel = timer.C + } // Wait for the timer to expire, or for a new renewal moment to be received select { case <-ctx.Done(): // context was cancelled, return nil return true - case <-timer.C: + case <-timerChannel: // Continue to the next select to try to send a message on renewalChan case renewMoment = <-nextRenewCh: // We recevieved a renew moment, next loop iteration will update the timer return false } - // Try to send a message on renewalChan, but also allow for the context to be - // cancelled. + // the renewal channel has a buffer of 1 - drop event if we are already issueing select { - case <-ctx.Done(): - // context was cancelled, return nil - return true case renewalChan <- struct{}{}: - // Message was sent on channel + default: } return false @@ -217,12 +210,14 @@ func (f *DynamicSource) GetCertificate(*tls.ClientHelloInfo) (*tls.Certificate, } func (f *DynamicSource) Healthy() bool { + f.lock.Lock() + defer f.lock.Unlock() return f.cachedCertificate != nil } func (f *DynamicSource) tryRegenerateCertificate(ctx context.Context, nextRenewCh chan<- time.Time) error { return wait.PollUntilContextCancel(ctx, f.RetryInterval, true, func(ctx context.Context) (done bool, err error) { - if err := f.regenerateCertificate(nextRenewCh); err != nil { + if err := f.regenerateCertificate(ctx, nextRenewCh); err != nil { f.log.Error(err, "Failed to generate serving certificate, retrying...", "interval", f.RetryInterval) return false, nil } @@ -233,7 +228,7 @@ func (f *DynamicSource) tryRegenerateCertificate(ctx context.Context, nextRenewC // regenerateCertificate will trigger the cached certificate and private key to // be regenerated by requesting a new certificate from the authority. -func (f *DynamicSource) regenerateCertificate(nextRenew chan<- time.Time) error { +func (f *DynamicSource) regenerateCertificate(ctx context.Context, nextRenew chan<- time.Time) error { f.log.V(logf.DebugLevel).Info("Generating new ECDSA private key") pk, err := pki.GenerateECPrivateKey(384) if err != nil { @@ -258,10 +253,10 @@ func (f *DynamicSource) regenerateCertificate(nextRenew chan<- time.Time) error f.log.V(logf.DebugLevel).Info("Signed new serving certificate") - return f.updateCertificate(pk, cert, nextRenew) + return f.updateCertificate(ctx, pk, cert, nextRenew) } -func (f *DynamicSource) updateCertificate(pk crypto.Signer, cert *x509.Certificate, nextRenewCh chan<- time.Time) error { +func (f *DynamicSource) updateCertificate(ctx context.Context, pk crypto.Signer, cert *x509.Certificate, nextRenewCh chan<- time.Time) error { f.lock.Lock() defer f.lock.Unlock() @@ -283,7 +278,15 @@ func (f *DynamicSource) updateCertificate(pk crypto.Signer, cert *x509.Certifica f.cachedCertificate = &bundle certDuration := cert.NotAfter.Sub(cert.NotBefore) // renew the certificate 1/3 of the time before its expiry - nextRenewCh <- cert.NotAfter.Add(certDuration / -3) + renewMoment := cert.NotAfter.Add(certDuration / -3) + + select { + case <-ctx.Done(): + return nil + + case nextRenewCh <- renewMoment: + } + f.log.V(logf.InfoLevel).Info("Updated cert-manager TLS certificate", "DNSNames", f.DNSNames) return nil diff --git a/pkg/server/tls/dynamic_source_test.go b/pkg/server/tls/dynamic_source_test.go index 20bf462d0..4f7898c7b 100644 --- a/pkg/server/tls/dynamic_source_test.go +++ b/pkg/server/tls/dynamic_source_test.go @@ -257,14 +257,19 @@ func TestDynamicSource_FailingSign(t *testing.T) { for i := 0; i < 5; i++ { // Sleep for a short time to allow the DynamicSource to generate a new certificate - time.Sleep(100 * time.Millisecond) + // The certificate should get renewed after 100ms, we wait for 200ms to allow for + // possible delays of max 100ms (based on experiments, we noticed that issuance of + // a cert takes about 30ms, so 100ms should be a large enough margin). + time.Sleep(200 * time.Millisecond) // Call the GetCertificate method, should return a NEW certificate - cert2, err := source.GetCertificate(&tls.ClientHelloInfo{}) + newCert, err := source.GetCertificate(&tls.ClientHelloInfo{}) assert.NoError(t, err) - assert.NotNil(t, cert2) + assert.NotNil(t, newCert) - assert.NotEqual(t, cert.Certificate[0], cert2.Certificate[0]) + assert.NotEqual(t, cert.Certificate[0], newCert.Certificate[0]) + + cert = newCert } }, cancelAtEnd: true,