From e4436daaf5e52ee39d5b2a3255935657be8089f8 Mon Sep 17 00:00:00 2001 From: Hunter Gregory <42728408+huntergregory@users.noreply.github.com> Date: Thu, 29 Aug 2024 07:55:19 -0700 Subject: [PATCH] [backport] fix: [NPM] [Linux] panic if applyIPSets continues to fail (#2969) [backport] fix: [NPM] [Linux] panic if applyIPSets continues to fail (#2964) Signed-off-by: Hunter Gregory <42728408+huntergregory@users.noreply.github.com> --- npm/pkg/dataplane/ipsets/ipsetmanager.go | 5 +++++ npm/pkg/dataplane/ipsets/ipsetmanager_linux.go | 13 +++++++++++++ 2 files changed, 18 insertions(+) diff --git a/npm/pkg/dataplane/ipsets/ipsetmanager.go b/npm/pkg/dataplane/ipsets/ipsetmanager.go index dc80dfae56..b57bf67e91 100644 --- a/npm/pkg/dataplane/ipsets/ipsetmanager.go +++ b/npm/pkg/dataplane/ipsets/ipsetmanager.go @@ -51,6 +51,9 @@ type IPSetManager struct { setMap map[string]*IPSet dirtyCache dirtyCacheInterface ioShim *common.IOShim + // consecutiveApplyFailures is used in Linux to count the number of consecutive failures to apply ipsets + // if this count exceeds a threshold, we will panic + consecutiveApplyFailures int sync.RWMutex } @@ -71,6 +74,8 @@ func NewIPSetManager(iMgrCfg *IPSetManagerCfg, ioShim *common.IOShim) *IPSetMana setMap: make(map[string]*IPSet), dirtyCache: newDirtyCache(), ioShim: ioShim, + // set to 0 to avoid lint error for windows + consecutiveApplyFailures: 0, } } diff --git a/npm/pkg/dataplane/ipsets/ipsetmanager_linux.go b/npm/pkg/dataplane/ipsets/ipsetmanager_linux.go index 8a7a8adbf0..3a29d14e37 100644 --- a/npm/pkg/dataplane/ipsets/ipsetmanager_linux.go +++ b/npm/pkg/dataplane/ipsets/ipsetmanager_linux.go @@ -54,6 +54,8 @@ const ( destroySectionPrefix = "delete" addOrUpdateSectionPrefix = "add/update" ipsetRestoreLineFailurePattern = "Error in line (\\d+):" + + maxConsecutiveFailures = 100 ) var ( @@ -408,8 +410,19 @@ func (iMgr *IPSetManager) applyIPSets() error { creator := iMgr.fileCreatorForApply(maxTryCount) restoreError := creator.RunCommandWithFile(ipsetCommand, ipsetRestoreFlag) if restoreError != nil { + iMgr.consecutiveApplyFailures++ + if iMgr.consecutiveApplyFailures >= maxConsecutiveFailures { + msg := fmt.Sprintf("exceeded max consecutive failures (%d) when applying ipsets. final error: %s", maxConsecutiveFailures, restoreError.Error()) + klog.Error(msg) + metrics.SendErrorLogAndMetric(util.IpsmID, msg) + panic(msg) + } + return npmerrors.SimpleErrorWrapper("ipset restore failed when applying ipsets", restoreError) } + + iMgr.consecutiveApplyFailures = 0 + return nil }