From 4fe1647c5151b7c7cf9d0b00f4f8ac2a5c6f53dc Mon Sep 17 00:00:00 2001 From: Aleksandar Himel Date: Mon, 3 Nov 2025 17:27:25 +0100 Subject: [PATCH] POC CNI and CNS changes to support IPv6 Secondary IPs. Additional work needed, draft only --- cni/network/invoker_cns.go | 56 +++++++++++ cni/network/network.go | 12 ++- cns/NetworkContainerContract.go | 1 + cns/restserver/ipam.go | 3 +- cns/restserver/util.go | 166 ++++++++++++++++++++++++++++++++ network/endpoint_windows.go | 5 + network/network_windows.go | 26 ++++- 7 files changed, 263 insertions(+), 6 deletions(-) diff --git a/cni/network/invoker_cns.go b/cni/network/invoker_cns.go index 928096b361..4c26c854b6 100644 --- a/cni/network/invoker_cns.go +++ b/cni/network/invoker_cns.go @@ -57,6 +57,7 @@ type IPResultInfo struct { routes []cns.Route pnpID string endpointPolicies []policy.Policy + secondaryIPs map[string]cns.SecondaryIPConfig } func (i IPResultInfo) MarshalLogObject(encoder zapcore.ObjectEncoder) error { @@ -162,6 +163,7 @@ func (invoker *CNSIPAMInvoker) Add(addConfig IPAMAddConfig) (IPAMAddResult, erro routes: response.PodIPInfo[i].Routes, pnpID: response.PodIPInfo[i].PnPID, endpointPolicies: response.PodIPInfo[i].EndpointPolicies, + secondaryIPs: response.PodIPInfo[i].SecondaryIPConfigs, } logger.Info("Received info for pod", @@ -505,9 +507,63 @@ func configureSecondaryAddResult(info *IPResultInfo, addResult *IPAMAddResult, p SkipDefaultRoutes: info.skipDefaultRoutes, } + if len(info.secondaryIPs) > 0 { + secIPConfig, err := BuildIPConfigForV6(info.secondaryIPs) + + if err == nil { + // If BuildIPConfigForV6 returns a value, take its address + ifaceInfo := addResult.interfaceInfo[key] + ifaceInfo.IPConfigs = append(ifaceInfo.IPConfigs, &secIPConfig) + addResult.interfaceInfo[key] = ifaceInfo + } + } + return nil } +// BuildIPConfigForV6 takes SecondaryIPConfigs and returns an IPConfig. +// Assumes map has at least one element and uses the first one found. +func BuildIPConfigForV6(secondaryIPs map[string]cns.SecondaryIPConfig) (network.IPConfig, error) { + for _, v := range secondaryIPs { + ip, ipNet, err := net.ParseCIDR(v.IPAddress) + if err != nil { + return network.IPConfig{}, fmt.Errorf("invalid IPAddress %q: %w", v.IPAddress, err) + } + if ip.To4() != nil { + return network.IPConfig{}, fmt.Errorf("expected IPv6, got IPv4: %q", v.IPAddress) + } + + // Preserve the original address/prefix (often /128) for the endpoint. + addr := *ipNet + + // Compute the gateway from the /64 network: + // If the parsed mask is /128, swap to /64 for the base; otherwise if already <= /64, use it. + ones, bits := ipNet.Mask.Size() + gwMask := ipNet.Mask + if ones > 64 { // e.g., /128 + gwMask = net.CIDRMask(64, bits) + } + + // Base = ip masked with /64 + base := ip.Mask(gwMask).To16() + if base == nil { + return network.IPConfig{}, fmt.Errorf("failed to get 16-byte IPv6 for %q", v.IPAddress) + } + + // Set gateway to ...:...:...:1 (i.e., last byte = 1) + gw := make(net.IP, len(base)) + copy(gw, base) + gw[15] = 0x01 // ::1 within that /64 + + return network.IPConfig{ + Address: addr, // original ipNet (likely /128) + Gateway: gw, // derived from /64 base + }, nil + } + + return network.IPConfig{}, fmt.Errorf("map is empty") +} + func addBackendNICToResult(info *IPResultInfo, addResult *IPAMAddResult, key string) error { macAddress, err := net.ParseMAC(info.macAddress) if err != nil { diff --git a/cni/network/network.go b/cni/network/network.go index 1ec1666f45..9b76db5cdc 100644 --- a/cni/network/network.go +++ b/cni/network/network.go @@ -567,6 +567,8 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error { natInfo := getNATInfo(nwCfg, options[network.SNATIPKey], enableSnatForDNS) networkID, _ := plugin.getNetworkID(args.Netns, &ifInfo, nwCfg) + isIPv6 := ipamAddResult.ipv6Enabled + createEpInfoOpt := createEpInfoOpt{ nwCfg: nwCfg, cnsNetworkConfig: ifInfo.NCResponse, @@ -582,7 +584,7 @@ func (plugin *NetPlugin) Add(args *cniSkel.CmdArgs) error { networkID: networkID, ifInfo: &ifInfo, ipamAddConfig: &ipamAddConfig, - ipv6Enabled: ipamAddResult.ipv6Enabled, + ipv6Enabled: isIPv6, infraSeen: &infraSeen, endpointIndex: endpointIndex, } @@ -1411,11 +1413,15 @@ func convertInterfaceInfoToCniResult(info network.InterfaceInfo, ifName string) if len(info.IPConfigs) > 0 { for _, ipconfig := range info.IPConfigs { - result.IPs = append(result.IPs, &cniTypesCurr.IPConfig{Address: ipconfig.Address, Gateway: ipconfig.Gateway}) + if ipconfig.Address.IP.To4() != nil { + result.IPs = append(result.IPs, &cniTypesCurr.IPConfig{Address: ipconfig.Address, Gateway: ipconfig.Gateway}) + } } for i := range info.Routes { - result.Routes = append(result.Routes, &cniTypes.Route{Dst: info.Routes[i].Dst, GW: info.Routes[i].Gw}) + if info.Routes[i].Gw.To4() != nil { + result.Routes = append(result.Routes, &cniTypes.Route{Dst: info.Routes[i].Dst, GW: info.Routes[i].Gw}) + } } } diff --git a/cns/NetworkContainerContract.go b/cns/NetworkContainerContract.go index 8f5939c28e..ea0b6ef7e0 100644 --- a/cns/NetworkContainerContract.go +++ b/cns/NetworkContainerContract.go @@ -503,6 +503,7 @@ type GetNetworkContainerResponse struct { type PodIpInfo struct { PodIPConfig IPSubnet + SecondaryIPConfigs map[string]SecondaryIPConfig // uuid is key NetworkContainerPrimaryIPConfig IPConfiguration HostPrimaryIPInfo HostIPInfo NICType NICType diff --git a/cns/restserver/ipam.go b/cns/restserver/ipam.go index 7c1366149d..f1d6aa5ee1 100644 --- a/cns/restserver/ipam.go +++ b/cns/restserver/ipam.go @@ -137,7 +137,7 @@ func (service *HTTPRestService) requestIPConfigHandlerHelperStandalone(ctx conte // IMPORTANT: although SwiftV2 reuses the concept of NCs, NMAgent doesn't program NCs for SwiftV2, but // instead programs NICs. When getting SwiftV2 NCs, we want the NIC type and MAC address of the NCs. // TODO: we need another way to verify and sync NMAgent's NIC programming status. pending new NMAgent API or NIC programming status to be passed in the SwiftV2 create NC request. - resp := service.getAllNetworkContainerResponses(cnsRequest) //nolint:contextcheck // not passed in any methods, appease linter + resp, respCreateRequest := service.getAllNetworkContainerResponsesIPv6(cnsRequest) //nolint:contextcheck // not passed in any methods, appease linter // return err if returned list has no NCs if len(resp) == 0 { return &cns.IPConfigsResponse{ @@ -156,6 +156,7 @@ func (service *HTTPRestService) requestIPConfigHandlerHelperStandalone(ctx conte MacAddress: resp[i].NetworkInterfaceInfo.MACAddress, NICType: resp[i].NetworkInterfaceInfo.NICType, NetworkContainerPrimaryIPConfig: resp[i].IPConfiguration, + SecondaryIPConfigs: respCreateRequest[i].SecondaryIPConfigs, } podIPInfoList = append(podIPInfoList, podIPInfo) } diff --git a/cns/restserver/util.go b/cns/restserver/util.go index a84eb8cef0..1de7aed61d 100644 --- a/cns/restserver/util.go +++ b/cns/restserver/util.go @@ -550,6 +550,172 @@ func (service *HTTPRestService) getAllNetworkContainerResponses( return getNetworkContainersResponse } +// Copy of above function as I can't easly change the GetNetworkContainerResponse, too many dependancies +func (service *HTTPRestService) getAllNetworkContainerResponsesIPv6( + req cns.GetNetworkContainerRequest, +) ([]cns.GetNetworkContainerResponse, []cns.CreateNetworkContainerRequest) { + var ( + getNetworkContainerResponse cns.GetNetworkContainerResponse + ncs []string + skipNCVersionCheck = false + ) + + service.Lock() + defer service.Unlock() + + switch service.state.OrchestratorType { + case cns.Kubernetes, cns.ServiceFabric, cns.Batch, cns.DBforPostgreSQL, cns.AzureFirstParty: + podInfo, err := cns.UnmarshalPodInfo(req.OrchestratorContext) + getNetworkContainersResponse := []cns.GetNetworkContainerResponse{} + + if err != nil { + response := cns.Response{ + ReturnCode: types.UnexpectedError, + Message: fmt.Sprintf("Unmarshalling orchestrator context failed with error %v", err), + } + + getNetworkContainerResponse.Response = response + getNetworkContainersResponse = append(getNetworkContainersResponse, getNetworkContainerResponse) + return getNetworkContainersResponse, nil + } + + // get networkContainerIDs as string, "nc1, nc2" + orchestratorContext := podInfo.Name() + podInfo.Namespace() + if service.state.ContainerIDByOrchestratorContext[orchestratorContext] != nil { + ncs = strings.Split(string(*service.state.ContainerIDByOrchestratorContext[orchestratorContext]), ",") + } + + // This indicates that there are no ncs for the given orchestrator context + if len(ncs) == 0 { + response := cns.Response{ + ReturnCode: types.UnknownContainerID, + Message: fmt.Sprintf("Failed to find networkContainerID for orchestratorContext %s", orchestratorContext), + } + + getNetworkContainerResponse.Response = response + getNetworkContainersResponse = append(getNetworkContainersResponse, getNetworkContainerResponse) + return getNetworkContainersResponse, nil + } + + ctx, cancel := context.WithTimeout(context.Background(), nmaAPICallTimeout) + defer cancel() + ncVersionListResp, err := service.nma.GetNCVersionList(ctx) + if err != nil { + skipNCVersionCheck = true + logger.Errorf("failed to get nc version list from nmagent") + // TODO: Add telemetry as this has potential to have containers in the running state w/o datapath working + } + nmaNCs := map[string]string{} + for _, nc := range ncVersionListResp.Containers { + // store nmaNCID as lower case to allow case insensitive comparison with nc stored in CNS + nmaNCs[strings.TrimPrefix(lowerCaseNCGuid(nc.NetworkContainerID), cns.SwiftPrefix)] = nc.Version + } + + if !skipNCVersionCheck { + for _, ncid := range ncs { + waitingForUpdate := false + // If the goal state is available with CNS, check if the NC is pending VFP programming + waitingForUpdate, getNetworkContainerResponse.Response.ReturnCode, getNetworkContainerResponse.Response.Message = service.isNCWaitingForUpdate(service.state.ContainerStatus[ncid].CreateNetworkContainerRequest.Version, ncid, nmaNCs) //nolint:lll // bad code + // If the return code is not success, return the error to the caller + if getNetworkContainerResponse.Response.ReturnCode == types.NetworkContainerVfpProgramPending { + logger.Errorf("[Azure-CNS] isNCWaitingForUpdate failed for NCID: %s", ncid) + } + + vfpUpdateComplete := !waitingForUpdate + ncstatus := service.state.ContainerStatus[ncid] + // Update the container status if- + // 1. VfpUpdateCompleted successfully + // 2. VfpUpdateComplete changed to false + if (getNetworkContainerResponse.Response.ReturnCode == types.NetworkContainerVfpProgramComplete && + vfpUpdateComplete && ncstatus.VfpUpdateComplete != vfpUpdateComplete) || + (!vfpUpdateComplete && ncstatus.VfpUpdateComplete != vfpUpdateComplete) { + logger.Printf("[Azure-CNS] Setting VfpUpdateComplete to %t for NCID: %s", vfpUpdateComplete, ncid) + ncstatus.VfpUpdateComplete = vfpUpdateComplete + service.state.ContainerStatus[ncid] = ncstatus + if err = service.saveState(); err != nil { + logger.Errorf("Failed to save goal states for nc %+v due to %s", getNetworkContainerResponse, err) + } + } + } + } + + if service.ChannelMode == cns.Managed { + // If the NC goal state doesn't exist in CNS running in managed mode, call DNC to retrieve the goal state + var ( + dncEP = service.GetOption(acn.OptPrivateEndpoint).(string) + infraVnet = service.GetOption(acn.OptInfrastructureNetworkID).(string) + nodeID = service.GetOption(acn.OptNodeID).(string) + ) + + service.Unlock() + getNetworkContainerResponse.Response.ReturnCode, getNetworkContainerResponse.Response.Message = service.SyncNodeStatus(dncEP, infraVnet, nodeID, req.OrchestratorContext) + service.Lock() + if getNetworkContainerResponse.Response.ReturnCode == types.NotFound { + getNetworkContainersResponse = append(getNetworkContainersResponse, getNetworkContainerResponse) + return getNetworkContainersResponse, nil + } + } + default: + getNetworkContainersResponse := []cns.GetNetworkContainerResponse{} + response := cns.Response{ + ReturnCode: types.UnsupportedOrchestratorType, + Message: fmt.Sprintf("Invalid orchestrator type %v", service.state.OrchestratorType), + } + + getNetworkContainerResponse.Response = response + getNetworkContainersResponse = append(getNetworkContainersResponse, getNetworkContainerResponse) + return getNetworkContainersResponse, nil + } + + getNetworkContainersResponse := []cns.GetNetworkContainerResponse{} + getCreateNetworkContainersRequest := []cns.CreateNetworkContainerRequest{} + + for _, ncid := range ncs { + containerStatus := service.state.ContainerStatus + containerDetails, ok := containerStatus[ncid] + if !ok { + response := cns.Response{ + ReturnCode: types.UnknownContainerID, + Message: "NetworkContainer doesn't exist.", + } + + getNetworkContainerResponse.Response = response + getNetworkContainersResponse = append(getNetworkContainersResponse, getNetworkContainerResponse) + continue + } + + savedReq := containerDetails.CreateNetworkContainerRequest + getNetworkContainerResponse = cns.GetNetworkContainerResponse{ + NetworkContainerID: savedReq.NetworkContainerid, + IPConfiguration: savedReq.IPConfiguration, + Routes: savedReq.Routes, + CnetAddressSpace: savedReq.CnetAddressSpace, + MultiTenancyInfo: savedReq.MultiTenancyInfo, + PrimaryInterfaceIdentifier: savedReq.PrimaryInterfaceIdentifier, + LocalIPConfiguration: savedReq.LocalIPConfiguration, + AllowHostToNCCommunication: savedReq.AllowHostToNCCommunication, + AllowNCToHostCommunication: savedReq.AllowNCToHostCommunication, + NetworkInterfaceInfo: savedReq.NetworkInterfaceInfo, + } + + // If the NC version check wasn't skipped, take into account the VFP programming status when returning the response + if !skipNCVersionCheck { + if !containerDetails.VfpUpdateComplete { + getNetworkContainerResponse.Response = cns.Response{ + ReturnCode: types.NetworkContainerVfpProgramPending, + Message: "NetworkContainer VFP programming is pending", + } + } + } + getNetworkContainersResponse = append(getNetworkContainersResponse, getNetworkContainerResponse) + getCreateNetworkContainersRequest = append(getCreateNetworkContainersRequest, savedReq) + } + + logger.Printf("getNetworkContainersResponses are %+v", getNetworkContainersResponse) + + return getNetworkContainersResponse, getCreateNetworkContainersRequest +} + // restoreNetworkState restores Network state that existed before reboot. func (service *HTTPRestService) restoreNetworkState() error { logger.Printf("[Azure CNS] Enter Restoring Network State") diff --git a/network/endpoint_windows.go b/network/endpoint_windows.go index edd52327f2..d0478f6ff8 100644 --- a/network/endpoint_windows.go +++ b/network/endpoint_windows.go @@ -343,6 +343,11 @@ func (nw *network) configureHcnEndpoint(epInfo *EndpointInfo) (*hcn.HostComputeE for _, ipAddress := range epInfo.IPAddresses { prefixLength, _ := ipAddress.Mask.Size() + + if ipAddress.IP.To4() == nil { + prefixLength = 64 + } + ipConfiguration := hcn.IpConfig{ IpAddress: ipAddress.IP.String(), PrefixLength: uint8(prefixLength), diff --git a/network/network_windows.go b/network/network_windows.go index a467b20983..ce2f0fe045 100644 --- a/network/network_windows.go +++ b/network/network_windows.go @@ -6,6 +6,7 @@ package network import ( "encoding/json" "fmt" + "net" "strconv" "strings" "time" @@ -289,13 +290,34 @@ func (nm *networkManager) configureHcnNetwork(nwInfo *EndpointInfo, extIf *exter // Populate subnets. for _, subnet := range nwInfo.Subnets { + + prefix := subnet.Prefix + if prefix.IP.To4() == nil { + // IPv6: normalize to /64 + prefix.Mask = net.CIDRMask(64, 128) + prefix.IP = prefix.IP.Mask(prefix.Mask) // zero out host bits + } + prefixStr := prefix.String() // e.g., fd00:da04:74ff:0::/64 + + // Check if it's IPv6 + if subnet.Prefix.IP.To4() == nil { + // IPv6: replace /128 with /64 if present + prefixStr = strings.Replace(prefixStr, "/128", "/64", 1) + } + + // Choose route based on IP family + routeDest := defaultRouteCIDR + if subnet.Prefix.IP.To4() == nil { + routeDest = defaultIPv6Route + } + hnsSubnet := hcn.Subnet{ - IpAddressPrefix: subnet.Prefix.String(), + IpAddressPrefix: prefixStr, // Set the Gateway route Routes: []hcn.Route{ { NextHop: subnet.Gateway.String(), - DestinationPrefix: defaultRouteCIDR, + DestinationPrefix: routeDest, }, }, }