diff --git a/npm/pkg/dataplane/dataplane.go b/npm/pkg/dataplane/dataplane.go index 8958d58321..52865e46c6 100644 --- a/npm/pkg/dataplane/dataplane.go +++ b/npm/pkg/dataplane/dataplane.go @@ -18,11 +18,12 @@ import ( const ( reconcileDuration = time.Duration(5 * time.Minute) - contextBackground = "BACKGROUND" - contextApplyDP = "APPLY-DP" - contextAddNetPol = "ADD-NETPOL" - contextAddNetPolBootup = "BOOTUP-ADD-NETPOL" - contextDelNetPol = "DEL-NETPOL" + contextBackground = "BACKGROUND" + contextApplyDP = "APPLY-DP" + contextAddNetPol = "ADD-NETPOL" + contextAddNetPolBootup = "BOOTUP-ADD-NETPOL" + contextAddNetPolCIDRPrecaution = "ADD-NETPOL-CIDR-PRECAUTION" + contextDelNetPol = "DEL-NETPOL" ) var ( @@ -471,6 +472,29 @@ func (dp *DataPlane) addPolicies(netPols []*policies.NPMNetworkPolicy) error { } } + if !util.IsWindowsDP() { + for _, netPol := range netPols { + if !(netPol.HasCIDRRules() && dp.ipsetMgr.PreviousApplyFailed()) { + continue + } + + if inBootupPhase { + // this should never happen because bootup phase is for windows, but just in case, we don't want to applyDataplaneNow() or else there will be a deadlock on dp.applyInfo + msg := fmt.Sprintf("[DataPlane] [%s] at risk of improperly applying a CIDR policy which is removed then readded", contextAddNetPolCIDRPrecaution) + klog.Warning(msg) + metrics.SendErrorLogAndMetric(util.DaemonDataplaneID, msg) + break + } + + // prevent #2977 + if err := dp.applyDataPlaneNow(contextAddNetPolCIDRPrecaution); err != nil { + return err // nolint:wrapcheck // unnecessary to wrap error since the provided context is included in the error + } + + break + } + } + // 1. Add IPSets and apply for each NetPol. // Apply IPSets after each NetworkPolicy unless ApplyInBackground=true and we're in the bootup phase (only happens for Windows currently) for _, netPol := range netPols { diff --git a/npm/pkg/dataplane/dataplane_test.go b/npm/pkg/dataplane/dataplane_test.go index c5bc3c4d37..fd32c547f3 100644 --- a/npm/pkg/dataplane/dataplane_test.go +++ b/npm/pkg/dataplane/dataplane_test.go @@ -221,6 +221,33 @@ func TestRemovePolicy(t *testing.T) { require.NoError(t, err) } +func TestHandle2977(t *testing.T) { + if util.IsWindowsDP() { + return + } + + metrics.InitializeAll() + + calls := append(getBootupTestCalls(), getAddPolicyTestCallsForDP(&testPolicyobj)...) + calls = append(calls, policies.GetRemovePolicyTestCalls(&testPolicyobj)...) + calls = append(calls, ipsets.GetApplyIPSetsFailureTestCalls()...) + calls = append(calls, ipsets.GetApplyIPSetsTestCalls(nil, getAffectedIPSets(&testPolicyobj))...) + calls = append(calls, getAddPolicyTestCallsForDP(&testPolicyobj)...) + ioshim := common.NewMockIOShim(calls) + defer ioshim.VerifyCalls(t, calls) + dp, err := NewDataPlane("testnode", ioshim, dpCfg, nil) + require.NoError(t, err) + + err = dp.AddPolicy(&testPolicyobj) + require.NoError(t, err) + + err = dp.RemovePolicy(testPolicyobj.PolicyKey) + require.Error(t, err) + + err = dp.AddPolicy(&testPolicyobj) + require.NoError(t, err) +} + func TestUpdatePolicy(t *testing.T) { metrics.InitializeAll() diff --git a/npm/pkg/dataplane/ipsets/ipsetmanager.go b/npm/pkg/dataplane/ipsets/ipsetmanager.go index b57bf67e91..e68247f071 100644 --- a/npm/pkg/dataplane/ipsets/ipsetmanager.go +++ b/npm/pkg/dataplane/ipsets/ipsetmanager.go @@ -79,6 +79,13 @@ func NewIPSetManager(iMgrCfg *IPSetManagerCfg, ioShim *common.IOShim) *IPSetMana } } +// PreviousApplyFailed is only relevant for Linux right now since Windows doesn't track failures +func (iMgr *IPSetManager) PreviousApplyFailed() bool { + iMgr.Lock() + defer iMgr.Unlock() + return iMgr.consecutiveApplyFailures > 0 +} + /* Reconcile removes empty/unreferenced sets from the cache. For ApplyAllIPSets mode, those sets are added to the toDeleteCache. diff --git a/npm/pkg/dataplane/ipsets/testutils_linux.go b/npm/pkg/dataplane/ipsets/testutils_linux.go index 94a59f4f45..3f0ef0117d 100644 --- a/npm/pkg/dataplane/ipsets/testutils_linux.go +++ b/npm/pkg/dataplane/ipsets/testutils_linux.go @@ -11,6 +11,12 @@ var ( Stdout: "success", ExitCode: 0, } + + fakeRestoreFailureCommand = testutils.TestCmd{ + Cmd: ipsetRestoreStringSlice, + Stdout: "failure", + ExitCode: 1, + } ) func GetApplyIPSetsTestCalls(toAddOrUpdateIPSets, toDeleteIPSets []*IPSetMetadata) []testutils.TestCmd { @@ -20,6 +26,16 @@ func GetApplyIPSetsTestCalls(toAddOrUpdateIPSets, toDeleteIPSets []*IPSetMetadat return []testutils.TestCmd{fakeRestoreSuccessCommand} } +func GetApplyIPSetsFailureTestCalls() []testutils.TestCmd { + return []testutils.TestCmd{ + fakeRestoreFailureCommand, + fakeRestoreFailureCommand, + fakeRestoreFailureCommand, + fakeRestoreFailureCommand, + fakeRestoreFailureCommand, + } +} + func GetResetTestCalls() []testutils.TestCmd { return []testutils.TestCmd{ {Cmd: []string{"ipset", "list", "--name"}, PipedToCommand: true}, diff --git a/npm/pkg/dataplane/ipsets/testutils_windows.go b/npm/pkg/dataplane/ipsets/testutils_windows.go index f9235beffe..1a14d4667a 100644 --- a/npm/pkg/dataplane/ipsets/testutils_windows.go +++ b/npm/pkg/dataplane/ipsets/testutils_windows.go @@ -27,6 +27,10 @@ func GetApplyIPSetsTestCalls(_, _ []*IPSetMetadata) []testutils.TestCmd { return []testutils.TestCmd{} } +func GetApplyIPSetsFailureTestCalls() []testutils.TestCmd { + return []testutils.TestCmd{} +} + func GetResetTestCalls() []testutils.TestCmd { return []testutils.TestCmd{} } diff --git a/npm/pkg/dataplane/policies/policy.go b/npm/pkg/dataplane/policies/policy.go index b411a7407b..646a03633a 100644 --- a/npm/pkg/dataplane/policies/policy.go +++ b/npm/pkg/dataplane/policies/policy.go @@ -42,6 +42,15 @@ func NewNPMNetworkPolicy(netPolName, netPolNamespace string) *NPMNetworkPolicy { } } +func (netPol *NPMNetworkPolicy) HasCIDRRules() bool { + for _, set := range netPol.RuleIPSets { + if set.Metadata.Type == ipsets.CIDRBlocks { + return true + } + } + return false +} + func (netPol *NPMNetworkPolicy) AllPodSelectorIPSets() []*ipsets.TranslatedIPSet { return append(netPol.PodSelectorIPSets, netPol.ChildPodSelectorIPSets...) }