Skip to content

Commit c93d867

Browse files
dssengsmira
andcommitted
fix: fix reverse routing for KubeSpan
This allows it to not come down when rp_filter is enabled. Fixes #9814 Co-authored-by: Andrey Smirnov <[email protected]> Signed-off-by: Dmitry Sharshakov <[email protected]>
1 parent 19040ff commit c93d867

File tree

6 files changed

+94
-4
lines changed

6 files changed

+94
-4
lines changed

.github/workflows/ci.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
22
#
3-
# Generated on 2025-01-24T14:30:35Z by kres 3075de9.
3+
# Generated on 2025-01-28T16:44:01Z by kres 987bf4d.
44

55
name: default
66
concurrency:
@@ -2070,6 +2070,7 @@ jobs:
20702070
IMAGE_REGISTRY: registry.dev.siderolabs.io\
20712071
SHORT_INTEGRATION_TEST: "yes"
20722072
WITH_CLUSTER_DISCOVERY: "true"
2073+
WITH_CONFIG_PATCH: '@hack/test/patches/kubespan-rp_filter.yaml'
20732074
WITH_KUBESPAN: "true"
20742075
run: |
20752076
sudo -E make e2e-qemu

.github/workflows/integration-misc-1-cron.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# THIS FILE WAS AUTOMATICALLY GENERATED, PLEASE DO NOT EDIT.
22
#
3-
# Generated on 2024-12-24T15:00:58Z by kres fcff05e.
3+
# Generated on 2025-01-15T12:49:27Z by kres 3b3f992.
44

55
name: integration-misc-1-cron
66
concurrency:
@@ -92,6 +92,7 @@ jobs:
9292
IMAGE_REGISTRY: registry.dev.siderolabs.io\
9393
SHORT_INTEGRATION_TEST: "yes"
9494
WITH_CLUSTER_DISCOVERY: "true"
95+
WITH_CONFIG_PATCH: '@hack/test/patches/kubespan-rp_filter.yaml'
9596
WITH_KUBESPAN: "true"
9697
run: |
9798
sudo -E make e2e-qemu

.kres.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -754,6 +754,7 @@ spec:
754754
SHORT_INTEGRATION_TEST: yes
755755
WITH_CLUSTER_DISCOVERY: true
756756
WITH_KUBESPAN: true
757+
WITH_CONFIG_PATCH: "@hack/test/patches/kubespan-rp_filter.yaml"
757758
IMAGE_REGISTRY: registry.dev.siderolabs.io\
758759
- name: e2e-default-hostname
759760
command: e2e-qemu
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
machine:
2+
sysctls:
3+
net.ipv4.conf.all.rp_filter: "1"

internal/app/machined/pkg/controllers/kubespan/manager.go

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,12 @@ import (
2727

2828
kubespanadapter "github.com/siderolabs/talos/internal/app/machined/pkg/adapters/kubespan"
2929
"github.com/siderolabs/talos/pkg/machinery/constants"
30+
"github.com/siderolabs/talos/pkg/machinery/kernel"
3031
"github.com/siderolabs/talos/pkg/machinery/nethelpers"
3132
"github.com/siderolabs/talos/pkg/machinery/resources/config"
3233
"github.com/siderolabs/talos/pkg/machinery/resources/kubespan"
3334
"github.com/siderolabs/talos/pkg/machinery/resources/network"
35+
"github.com/siderolabs/talos/pkg/machinery/resources/runtime"
3436
)
3537

3638
// DefaultPeerReconcileInterval is interval between peer status reconciliation on timer.
@@ -108,6 +110,10 @@ func (ctrl *ManagerController) Outputs() []controller.Output {
108110
Type: kubespan.PeerStatusType,
109111
Kind: controller.OutputExclusive,
110112
},
113+
{
114+
Type: runtime.KernelParamSpecType,
115+
Kind: controller.OutputShared,
116+
},
111117
}
112118
}
113119

@@ -371,13 +377,15 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo
371377
spec.Policy = nethelpers.VerdictAccept
372378

373379
spec.Rules = []network.NfTablesRule{
380+
// Accept outgoing WireGuard packets.
374381
{
375382
MatchMark: &network.NfTablesMark{
376383
Mask: constants.KubeSpanDefaultFirewallMask,
377384
Value: constants.KubeSpanDefaultFirewallMark,
378385
},
379386
Verdict: pointer.To(nethelpers.VerdictAccept),
380387
},
388+
// Mark packets to be sent over the KubeSpan link.
381389
{
382390
MatchDestinationAddress: &network.NfTablesAddressMatch{
383391
IncludeSubnets: allowedIPsSet.Prefixes(),
@@ -388,6 +396,35 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo
388396
},
389397
Verdict: pointer.To(nethelpers.VerdictAccept),
390398
},
399+
// Remove KubeSpan mark from packets not sent to KubeSpan peers or received from them.
400+
// This is typically the case when deencapsulated VXLAN packets retain envelope's fwmark, thus causing a routing loop.
401+
{
402+
MatchSourceAddress: &network.NfTablesAddressMatch{
403+
Invert: true,
404+
IncludeSubnets: allowedIPsSet.Prefixes(),
405+
},
406+
MatchMark: &network.NfTablesMark{
407+
Mask: constants.KubeSpanDefaultForceFirewallMark,
408+
Value: constants.KubeSpanDefaultForceFirewallMark,
409+
},
410+
SetMark: &network.NfTablesMark{
411+
Mask: 0xffffffff,
412+
Xor: constants.KubeSpanDefaultForceFirewallMark,
413+
},
414+
Verdict: pointer.To(nethelpers.VerdictAccept),
415+
},
416+
// Mark incoming packets from the KubeSpan link for rp_filter to find the correct routing table.
417+
{
418+
MatchIIfName: &network.NfTablesIfNameMatch{
419+
InterfaceNames: []string{constants.KubeSpanLinkName},
420+
Operator: nethelpers.OperatorEqual,
421+
},
422+
SetMark: &network.NfTablesMark{
423+
Mask: ^uint32(constants.KubeSpanDefaultFirewallMask),
424+
Xor: constants.KubeSpanDefaultForceFirewallMark,
425+
},
426+
Verdict: pointer.To(nethelpers.VerdictAccept),
427+
},
391428
}
392429

393430
return nil
@@ -554,6 +591,17 @@ func (ctrl *ManagerController) Run(ctx context.Context, r controller.Runtime, lo
554591
return fmt.Errorf("error modifying link spec: %w", err)
555592
}
556593

594+
if err = safe.WriterModify(ctx, r, runtime.NewKernelParamSpec(
595+
runtime.NamespaceName,
596+
kernel.Sysctl+".net.ipv4.conf."+constants.KubeSpanLinkName+".src_valid_mark",
597+
), func(res *runtime.KernelParamSpec) error {
598+
res.TypedSpec().Value = "1"
599+
600+
return nil
601+
}); err != nil {
602+
return err
603+
}
604+
557605
if rulesMgr == nil {
558606
rulesMgr = ctrl.RulesManagerFactory(constants.KubeSpanDefaultRoutingTable, constants.KubeSpanDefaultForceFirewallMark, constants.KubeSpanDefaultFirewallMask)
559607

@@ -591,6 +639,10 @@ func (ctrl *ManagerController) cleanup(ctx context.Context, r controller.Runtime
591639
namespace: kubespan.NamespaceName,
592640
typ: kubespan.PeerStatusType,
593641
},
642+
{
643+
namespace: runtime.NamespaceName,
644+
typ: runtime.KernelParamSpecType,
645+
},
594646
} {
595647
// list keys for cleanup
596648
list, err := r.List(ctx, resource.NewMetadata(item.namespace, item.typ, "", resource.VersionUndefined))

internal/app/machined/pkg/controllers/kubespan/manager_test.go

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/siderolabs/talos/pkg/machinery/resources/config"
2626
"github.com/siderolabs/talos/pkg/machinery/resources/kubespan"
2727
"github.com/siderolabs/talos/pkg/machinery/resources/network"
28+
"github.com/siderolabs/talos/pkg/machinery/resources/runtime"
2829
)
2930

3031
type ManagerSuite struct {
@@ -40,6 +41,10 @@ func (suite *ManagerSuite) TestDisabled() {
4041
suite.Require().NoError(suite.State().Create(suite.Ctx(), cfg))
4142

4243
ctest.AssertNoResource[*network.NfTablesChain](suite, "kubespan_outgoing")
44+
ctest.AssertNoResource[*runtime.KernelParamSpec](
45+
suite,
46+
"proc.sys.net.ipv4.conf.kubespan.src_valid_mark",
47+
)
4348
}
4449

4550
type mockWireguardClient struct {
@@ -244,9 +249,9 @@ func (suite *ManagerSuite) TestReconcile() {
244249
asrt.Equal(nethelpers.ChainPriorityFilter, spec.Priority)
245250
asrt.Equal(nethelpers.VerdictAccept, spec.Policy)
246251

247-
asrt.Len(spec.Rules, 2)
252+
asrt.Len(spec.Rules, 3)
248253

249-
if len(spec.Rules) != 2 {
254+
if len(spec.Rules) != 3 {
250255
return
251256
}
252257

@@ -277,6 +282,21 @@ func (suite *ManagerSuite) TestReconcile() {
277282
},
278283
spec.Rules[1],
279284
)
285+
286+
asrt.Equal(
287+
network.NfTablesRule{
288+
MatchIIfName: &network.NfTablesIfNameMatch{
289+
InterfaceNames: []string{constants.KubeSpanLinkName},
290+
Operator: nethelpers.OperatorEqual,
291+
},
292+
SetMark: &network.NfTablesMark{
293+
Mask: ^uint32(constants.KubeSpanDefaultFirewallMask),
294+
Xor: constants.KubeSpanDefaultForceFirewallMark,
295+
},
296+
Verdict: pointer.To(nethelpers.VerdictAccept),
297+
},
298+
spec.Rules[2],
299+
)
280300
},
281301
)
282302

@@ -358,6 +378,14 @@ func (suite *ManagerSuite) TestReconcile() {
358378
},
359379
)
360380

381+
ctest.AssertResource(
382+
suite,
383+
"proc.sys.net.ipv4.conf.kubespan.src_valid_mark",
384+
func(res *runtime.KernelParamSpec, asrt *assert.Assertions) {
385+
asrt.Equal(res.TypedSpec().Value, "1")
386+
},
387+
)
388+
361389
// update config and disable wireguard, everything should be cleaned up
362390
cfg.TypedSpec().Enabled = false
363391
suite.Require().NoError(suite.State().Update(suite.Ctx(), cfg))
@@ -371,6 +399,10 @@ func (suite *ManagerSuite) TestReconcile() {
371399
suite,
372400
"kubespan_prerouting",
373401
)
402+
ctest.AssertNoResource[*runtime.KernelParamSpec](
403+
suite,
404+
"proc.sys.net.ipv4.conf.kubespan.src_valid_mark",
405+
)
374406
}
375407

376408
func asUDP(addr netip.AddrPort) *net.UDPAddr {

0 commit comments

Comments
 (0)