Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 56 additions & 78 deletions go.mod

Large diffs are not rendered by default.

164 changes: 60 additions & 104 deletions go.sum

Large diffs are not rendered by default.

1 change: 0 additions & 1 deletion pkg/cli/defaults/defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ func Set(_ *cli.Context, dataDir string) error {

cmds.ServerConfig.ClusterInit = true
cmds.ServerConfig.DisableNPC = true
cmds.ServerConfig.FlannelBackend = "none"
cmds.ServerConfig.AdvertisePort = 6443
cmds.ServerConfig.SupervisorPort = 9345
cmds.ServerConfig.HTTPSPort = 6443
Expand Down
47 changes: 22 additions & 25 deletions pkg/executor/pebinary/pebinary.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ type PEBinaryConfig struct {

apiServerReady <-chan struct{}
criReady chan struct{}
cniReady chan struct{}
dataReady chan struct{}
}

Expand All @@ -94,6 +95,7 @@ const (
func (p *PEBinaryConfig) Bootstrap(ctx context.Context, nodeConfig *config.Node, cfg cmds.Agent) error {
p.apiServerReady = util.APIServerReadyChan(ctx, nodeConfig.AgentConfig.KubeConfigK3sController, util.DefaultAPIServerReadyTimeout)
p.criReady = make(chan struct{})
p.cniReady = make(chan struct{})
p.dataReady = make(chan struct{})

// On servers this is set to an initial value from the CLI when the resolver is created, so that
Expand Down Expand Up @@ -145,7 +147,7 @@ func (p *PEBinaryConfig) Bootstrap(ctx context.Context, nodeConfig *config.Node,
case CNIFlannel:
p.CNIPlugin = &win.Flannel{}
case CNINone:
return nil
p.CNIPlugin = &win.None{}
default:
return fmt.Errorf("unsupported CNI %s", p.CNIName)
}
Expand Down Expand Up @@ -192,22 +194,12 @@ func (p *PEBinaryConfig) Kubelet(ctx context.Context, args []string) error {
win.ProcessWaitGroup.StartWithContext(ctx, func(ctx context.Context) {
for {
logrus.Infof("Running RKE2 kubelet %v", cleanArgs)
cniCtx, cancel := context.WithCancel(ctx)
if p.CNIName != CNINone {
go func() {
if err := p.CNIPlugin.Start(cniCtx); err != nil {
logrus.Errorf("Failed to start %s CNI: %v", p.CNIName, err)
}
}()
}

cmd := exec.CommandContext(ctx, p.KubeletPath, cleanArgs...)
cmd.Stdout = logOut
cmd.Stderr = logOut
if err := cmd.Run(); err != nil {
logrus.Errorf("Kubelet exited: %v", err)
}
cancel()

// If the rke2-uninstall.ps1 script created the lock file, we are removing rke2 and thus we don't restart kubelet
if _, err := os.Stat(lockFile); err == nil {
Expand All @@ -226,23 +218,23 @@ func (p *PEBinaryConfig) Kubelet(ctx context.Context, args []string) error {
return nil
}

// KubeProxy starts the kubeproxy in a subprocess with watching goroutine.
// KubeProxy starts the kube-proxy in a subprocess with watching goroutine.
// kube-proxy must be started after the CNI, as most CNIs reserve a VIP for it
// to use as the source for proxied traffic.
// ref: https://github.com/kubernetes/kubernetes/issues/123014
func (p *PEBinaryConfig) KubeProxy(ctx context.Context, args []string) error {
if p.CNIName == CNINone {
return nil
}
<-p.cniReady

CNIConfig := p.CNIPlugin.GetConfig()
vip, err := p.CNIPlugin.ReserveSourceVip(ctx)
if err != nil || vip == "" {
logrus.Errorf("Failed to reserve VIP for kube-proxy: %v", err)
extraArgs := map[string]string{}
config := p.CNIPlugin.GetConfig()
if config.OverlayNetName != "" {
extraArgs["network-name"] = config.OverlayNetName
}
logrus.Infof("Reserved VIP for kube-proxy: %s", vip)

extraArgs := map[string]string{
"network-name": CNIConfig.OverlayNetName,
"bind-address": CNIConfig.NodeIP,
"source-vip": vip,
if config.NodeIP != "" {
extraArgs["bind-address"] = config.NodeIP
}
if config.VIPAddress != "" {
extraArgs["source-vip"] = config.VIPAddress
}

if err := hcn.DSRSupported(); err == nil {
Expand Down Expand Up @@ -411,6 +403,11 @@ func (p *PEBinaryConfig) CRI(ctx context.Context, cfg *config.Node) error {
return executor.CloseIfNilErr(nil, p.criReady)
}

func (p *PEBinaryConfig) CNI(ctx context.Context, wg *sync.WaitGroup, cfg *config.Node) error {
logrus.Infof("Running RKE2 CNI: %s", p.CNIName)
return executor.CloseIfNilErr(p.CNIPlugin.Start(ctx), p.cniReady)
}

func (p *PEBinaryConfig) APIServerReadyChan() <-chan struct{} {
if p.apiServerReady == nil {
panic("executor not bootstrapped")
Expand Down
5 changes: 5 additions & 0 deletions pkg/executor/staticpod/staticpod.go
Original file line number Diff line number Diff line change
Expand Up @@ -505,6 +505,11 @@ func (s *StaticPodConfig) CRI(ctx context.Context, cfg *daemonconfig.Node) error
return executor.CloseIfNilErr(nil, s.criReady)
}

func (s *StaticPodConfig) CNI(ctx context.Context, wg *sync.WaitGroup, config *daemonconfig.Node) error {
// CNI on linux is run in pods deployed by charts
return nil
}

func (s *StaticPodConfig) APIServerReadyChan() <-chan struct{} {
if s.apiServerReady == nil {
panic("executor not bootstrapped")
Expand Down
2 changes: 1 addition & 1 deletion pkg/logging/logging.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func ExtractFromArgs(args []string) ([]string, io.Writer) {
}

// GetLogger returns a new io.Writer that writes to the specified file
func GetLogger(filename string, maxSize int) io.Writer {
func GetLogger(filename string, maxSize int) io.WriteCloser {
return &lumberjack.Logger{
Filename: filename,
MaxSize: int(maxSize),
Expand Down
117 changes: 69 additions & 48 deletions pkg/windows/calico.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ users:
`))
)

// explicit interface check
var _ CNIPlugin = &Calico{}

type Calico struct {
CNICfg *CalicoConfig
KubeClient *kubernetes.Clientset
Expand Down Expand Up @@ -270,35 +273,31 @@ func (c *Calico) Start(ctx context.Context) error {
if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
_, err := c.KubeClient.CoreV1().Nodes().Get(ctx, c.CNICfg.Hostname, metav1.GetOptions{})
if err != nil {
logrus.WithError(err).Warningf("Calico can't start because it can't find node, retrying %s", c.CNICfg.Hostname)
logrus.WithError(err).Warningf("Calico can't start because it can't find node %s, retrying...", c.CNICfg.Hostname)
return false, nil
}

logrus.Infof("Node %s registered. Calico can start", c.CNICfg.Hostname)

if err := startCalico(ctx, c.CNICfg, logPath); err != nil {
logrus.Errorf("Calico exited: %v. Retrying", err)
logrus.Errorf("Calico startup failed: %v, retrying...", err)
return false, nil
}
return true, nil
}); err != nil {
return err
}

go startFelix(ctx, c.CNICfg, logPath)
if c.CNICfg.OverlayEncap == "windows-bgp" {
go startConfd(ctx, c.CNICfg, logPath)
}
go runFelix(ctx, c.CNICfg, logPath)
go runConfd(ctx, c.CNICfg, logPath)

// Delete policies in case calico network is being reused
policies, _ := hcsshim.HNSListPolicyListRequest()
for _, policy := range policies {
policy.Delete()
}

logrus.Info("Calico started correctly")

return nil
return reserveCalicoVIP(ctx, c.CNICfg)
}

// generateCalicoNetworks creates the overlay networks for internode networking
Expand Down Expand Up @@ -418,9 +417,12 @@ func findCalicoInterface(nodeV4 *opv1.NodeAddressAutodetection) (IPAutoDetection
return
}

// startConfd starts the confd service (for BGP)
func startConfd(ctx context.Context, config *CalicoConfig, logPath string) {
outputFile := logging.GetLogger(filepath.Join(logPath, "confd.log"), 50)
// runConfd starts the confd service (for BGP)
func runConfd(ctx context.Context, config *CalicoConfig, logPath string) {
logPath = filepath.Join(logPath, "confd.log")
if config.OverlayEncap != "windows-bgp" {
return
}

specificEnvs := []string{
fmt.Sprintf("PATH=%s", os.Getenv("PATH")),
Expand All @@ -430,21 +432,32 @@ func startConfd(ctx context.Context, config *CalicoConfig, logPath string) {
"-confd",
fmt.Sprintf("-confd-confdir=%s", filepath.Join(config.CNIBinDir, "confd")),
}

logrus.Infof("Confd Envs: %s", append(generateGeneralCalicoEnvs(config), specificEnvs...))
cmd := exec.CommandContext(ctx, "calico-node.exe", args...)
cmd.Env = append(generateGeneralCalicoEnvs(config), specificEnvs...)
cmd.Stdout = outputFile
cmd.Stderr = outputFile
_ = os.Chdir(filepath.Join(config.CNIBinDir, "confd"))
_ = cmd.Run()
logrus.Error("Confd exited")
}

// startFelix starts the felix service
func startFelix(ctx context.Context, config *CalicoConfig, logPath string) {
outputFile := logging.GetLogger(filepath.Join(logPath, "felix.log"), 50)
for {
logrus.Infof("Confd logging to %s", logPath)
outputFile := logging.GetLogger(logPath, 50)
cmd := exec.CommandContext(ctx, "calico-node.exe", args...)
cmd.Env = append(generateGeneralCalicoEnvs(config), specificEnvs...)
cmd.Stdout = outputFile
cmd.Stderr = outputFile
os.Chdir(filepath.Join(config.CNIBinDir, "confd"))
err := cmd.Run()
if ctx.Err() != nil {
return
}
if eerr, ok := err.(*exec.ExitError); ok && eerr.ProcessState.ExitCode() == 129 {
logrus.Infof("Confd exited for config reload; restarting...")
}
logrus.WithError(err).Info("Confd exited, restarting...")
outputFile.Close()
time.Sleep(time.Second)
}
}

// runFelix starts the felix service
func runFelix(ctx context.Context, config *CalicoConfig, logPath string) {
logPath = filepath.Join(logPath, "felix.log")
specificEnvs := []string{
fmt.Sprintf("FELIX_FELIXHOSTNAME=%s", config.Hostname),
fmt.Sprintf("FELIX_VXLANVNI=%s", config.VxlanVNI),
Expand All @@ -463,17 +476,32 @@ func startFelix(ctx context.Context, config *CalicoConfig, logPath string) {
}

logrus.Infof("Felix Envs: %s", append(generateGeneralCalicoEnvs(config), specificEnvs...))
cmd := exec.CommandContext(ctx, "calico-node.exe", args...)
cmd.Env = append(generateGeneralCalicoEnvs(config), specificEnvs...)
cmd.Stdout = outputFile
cmd.Stderr = outputFile
_ = cmd.Run()
logrus.Error("Felix exited")

for {
logrus.Infof("Felix logging to %s", logPath)
outputFile := logging.GetLogger(logPath, 50)
cmd := exec.CommandContext(ctx, "calico-node.exe", args...)
cmd.Env = append(generateGeneralCalicoEnvs(config), specificEnvs...)
cmd.Stdout = outputFile
cmd.Stderr = outputFile
err := cmd.Run()
if ctx.Err() != nil {
return
}
if eerr, ok := err.(*exec.ExitError); ok && eerr.ProcessState.ExitCode() == 129 {
logrus.Infof("Felix exited for config reload; restarting...")
}
logrus.WithError(err).Info("Felix exited, restarting...")
outputFile.Close()
}
}

// startCalico starts the calico service
// startCalico runs the calico non-privileged start-up routine.
// This is expected to exit with 0 exit code when the CNI is ready to operate.
func startCalico(ctx context.Context, config *CalicoConfig, logPath string) error {
outputFile := logging.GetLogger(filepath.Join(logPath, "calico-node.log"), 50)
logPath = filepath.Join(logPath, "calico-node.log")
outputFile := logging.GetLogger(logPath, 50)
defer outputFile.Close()

specificEnvs := []string{
fmt.Sprintf("CALICO_NODENAME_FILE=%s", config.NodeNameFile),
Expand All @@ -494,14 +522,12 @@ func startCalico(ctx context.Context, config *CalicoConfig, logPath string) erro
"-startup",
}
logrus.Infof("Calico Envs: %s", append(generateGeneralCalicoEnvs(config), specificEnvs...))
logrus.Infof("Calico logging to %s", logPath)
cmd := exec.CommandContext(ctx, "calico-node.exe", args...)
cmd.Env = append(generateGeneralCalicoEnvs(config), specificEnvs...)
cmd.Stdout = outputFile
cmd.Stderr = outputFile
if err := cmd.Run(); err != nil {
return err
}
return nil
return cmd.Run()
}

func generateGeneralCalicoEnvs(config *CalicoConfig) []string {
Expand All @@ -516,24 +542,19 @@ func generateGeneralCalicoEnvs(config *CalicoConfig) []string {
}
}

// ReserveSourceVip reserves a source VIP for kube-proxy
func (c *Calico) ReserveSourceVip(ctx context.Context) (string, error) {
var vip string

if err := wait.PollUntilContextTimeout(ctx, 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
// reserveCalicoVIP reserves a source VIP for kube-proxy
// If successful, the VIP is stored to config.CNICommonConfig.VIPAddress.
func reserveCalicoVIP(ctx context.Context, config *CalicoConfig) error {
return wait.PollUntilContextTimeout(ctx, 5*time.Second, 5*time.Minute, true, func(ctx context.Context) (bool, error) {
// calico-node is creating an endpoint named Calico_ep for this purpose
endpoint, err := hcsshim.GetHNSEndpointByName("Calico_ep")
if err != nil {
logrus.WithError(err).Warning("can't find Calico_ep HNS endpoint, retrying")
logrus.WithError(err).Warning("Can't find Calico_ep HNS endpoint, retrying")
return false, nil
}
vip = endpoint.IPAddress.String()
config.CNICommonConfig.VIPAddress = endpoint.IPAddress.String()
return true, nil
}); err != nil {
return "", err
}

return vip, nil
})
}

// Get latest stored reboot
Expand Down
Loading
Loading