Skip to content

Commit

Permalink
fix: stateless CNI delete fix
Browse files Browse the repository at this point in the history
  • Loading branch information
behzad-mir committed Aug 28, 2024
1 parent e03eaed commit ccaa383
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 12 deletions.
17 changes: 16 additions & 1 deletion cni/network/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"os"
"regexp"
"strconv"
"strings"
"time"

"github.com/Azure/azure-container-networking/aitelemetry"
Expand All @@ -19,6 +20,7 @@ import (
"github.com/Azure/azure-container-networking/cni/util"
"github.com/Azure/azure-container-networking/cns"
cnscli "github.com/Azure/azure-container-networking/cns/client"
"github.com/Azure/azure-container-networking/cns/fsnotify"
"github.com/Azure/azure-container-networking/common"
"github.com/Azure/azure-container-networking/iptables"
"github.com/Azure/azure-container-networking/netio"
Expand Down Expand Up @@ -1125,18 +1127,31 @@ func (plugin *NetPlugin) Delete(args *cniSkel.CmdArgs) error {
// network ID is passed in and used only for migration
// otherwise, in stateless, we don't need the network id for deletion
epInfos, err = plugin.nm.GetEndpointState(networkID, args.ContainerID)
// if stateless CNI fail to get the endpoint from CNS for any reason other than Endpoint Not found
if err != nil && !strings.Contains(err.Error(), "endpoint state could not be found in the statefile") {
if strings.Contains(err.Error(), "No connection could be made") {
addErr := fsnotify.AddFile("stateless_"+args.ContainerID, args.ContainerID, watcherPath)
if addErr != nil {
logger.Error("Failed to add file to watcher", zap.String("containerID", args.ContainerID), zap.Error(addErr))
return errors.Wrap(addErr, fmt.Sprintf("failed to add file to watcher with containerID %s", args.ContainerID))
}

}
return plugin.RetriableError(fmt.Errorf("failed to delete endpoint: %w", err))
}
} else {
epInfos = plugin.nm.GetEndpointInfosFromContainerID(args.ContainerID)
}

// for when the endpoint is not created, but the ips are already allocated (only works if single network, single infra)
// stateless cni won't have this issue
// stateless cni might get into this issue if endpoint is not present in the statefile
if len(epInfos) == 0 {
endpointID := plugin.nm.GetEndpointID(args.ContainerID, args.IfName)
if !nwCfg.MultiTenancy {
logger.Error("Failed to query endpoint",
zap.String("endpoint", endpointID),
zap.Error(err))

logger.Error("Release ip by ContainerID (endpoint not found)",
zap.String("containerID", args.ContainerID))
sendEvent(plugin, fmt.Sprintf("Release ip by ContainerID (endpoint not found):%v", args.ContainerID))
Expand Down
30 changes: 29 additions & 1 deletion cns/fsnotify/fsnotify.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,14 @@ import (
"io"
"io/fs"
"os"
"runtime"
"strings"
"sync"
"time"

"github.com/Azure/azure-container-networking/cns"
"github.com/Azure/azure-container-networking/cns/hnsclient"
"github.com/Azure/azure-container-networking/cns/restserver"
"github.com/fsnotify/fsnotify"
"github.com/pkg/errors"
"go.uber.org/zap"
Expand All @@ -17,6 +21,7 @@ import (

type releaseIPsClient interface {
ReleaseIPs(ctx context.Context, ipconfig cns.IPConfigsRequest) error
GetEndpoint(ctx context.Context, endpointID string) (*restserver.GetEndpointResponse, error)
}

type watcher struct {
Expand Down Expand Up @@ -68,7 +73,16 @@ func (w *watcher) releaseAll(ctx context.Context) {
}
file.Close()
podInterfaceID := string(data)

// in case of stateless CNI, CNS needs to remove HNS endpoitns first
if strings.Contains(podInterfaceID, "stateless") && runtime.GOOS == "windows" {
podInterfaceID = podInterfaceID[9:]
w.log.Info("removinf HNS Endpoint for", zap.String("podInterfaceID", podInterfaceID))
// remove HNS endpoint
if err := w.deleteHNSEndpoint(ctx, containerID); err != nil {
w.log.Error("failed to remove HNS endpoint", zap.Error(err))
return
}
}
w.log.Info("releasing IP for missed delete", zap.String("podInterfaceID", podInterfaceID), zap.String("containerID", containerID))
if err := w.releaseIP(ctx, podInterfaceID, containerID); err != nil {
w.log.Error("failed to release IP for missed delete", zap.String("containerID", containerID), zap.Error(err))
Expand Down Expand Up @@ -206,3 +220,17 @@ func (w *watcher) releaseIP(ctx context.Context, podInterfaceID, containerID str
}
return errors.Wrap(w.cli.ReleaseIPs(ctx, *ipconfigreq), "failed to release IP from CNS")
}

// call GetEndpoint API to get the state and then remove assiciated HNS
func (w *watcher) deleteHNSEndpoint(ctx context.Context, containerid string) error {
endpointResponse, err := w.cli.GetEndpoint(ctx, containerid)
if err != nil {
return errors.Wrap(err, "failed to read the endpoint from CNS state")
}
for _, ipInfo := range endpointResponse.EndpointInfo.IfnameToIPMap {
if err := hnsclient.DeleteHNSEndpointbyID(ipInfo.HnsEndpointID); err != nil {
return errors.Wrap(err, "failed to delete HNS endpoint with id "+ipInfo.HnsEndpointID)
}
}
return nil
}
6 changes: 6 additions & 0 deletions cns/hnsclient/hnsclient_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,9 @@ func DeleteHostNCApipaEndpoint(
networkContainerID string) error {
return nil
}

// DeleteHNSEndpointbyID deletes the HNS endpoint
// created for Satateless CNI Asynch delete
func DeleteHNSEndpointbyID(_ string) error {
return nil
}
9 changes: 9 additions & 0 deletions cns/hnsclient/hnsclient_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -685,3 +685,12 @@ func DeleteHostNCApipaEndpoint(

return nil
}

// DeleteHNSEndpointbyID deletes the HNS endpoint
func DeleteHNSEndpointbyID(endpointName string) error {
if err := deleteEndpointByNameHnsV2(endpointName); err != nil {
logger.Errorf("[Azure CNS] Failed to delete HNS Endpoint: %s. Error: %v", endpointName, err)
return err
}
return nil
}
19 changes: 9 additions & 10 deletions test/validate/windows_validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ var (
hnsNetworkCmd = []string{"powershell", "-c", "Get-HnsNetwork | ConvertTo-Json"}
azureVnetCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet.json"}
azureVnetIpamCmd = []string{"powershell", "-c", "cat ../../k/azure-vnet-ipam.json"}
cnsWinManagedStateFileCmd = []string{"powershell", "-c", "cat ../../k/azure-cns/azure-endpoints.json"}
cnsWinManagedStateFileCmd = []string{"powershell", "-c", "cat ../../k/azurecns/azure-endpoints.json"}
cnsWinCachedAssignedIPStateCmd = []string{
"powershell", "Invoke-WebRequest -Uri 127.0.0.1:10090/debug/ipaddresses",
"-Method Post -ContentType application/x-www-form-urlencoded",
Expand Down Expand Up @@ -80,19 +80,18 @@ var windowsChecksMap = map[string][]check{
},
},
"stateless": {
//{
// name: "hns",
// stateFileIPs: hnsStateFileIPs,
// podLabelSelector: privilegedLabelSelector,
// podNamespace: privilegedNamespace,
// cmd: hnsEndPointCmd,
//},
{
name: "hns",
stateFileIPs: hnsStateFileIPs,
podLabelSelector: privilegedLabelSelector,
podNamespace: privilegedNamespace,
cmd: hnsEndPointCmd,
},
{
name: "cns",
stateFileIPs: cnsManagedStateFileIps,
podLabelSelector: validatorPod,
podLabelSelector: privilegedLabelSelector,
podNamespace: privilegedNamespace,
containerName: "debug",
cmd: cnsWinManagedStateFileCmd,
}, // cns configmap "ManageEndpointState": true, | Endpoints managed in CNS State File
{
Expand Down

0 comments on commit ccaa383

Please sign in to comment.