Skip to content

Fix Linux Node Exporter #3305

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clusterloader2/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ require (
github.com/stretchr/testify v1.10.0
golang.org/x/crypto v0.37.0
golang.org/x/oauth2 v0.29.0
golang.org/x/sync v0.13.0
golang.org/x/time v0.11.0
gopkg.in/yaml.v2 v2.4.0
k8s.io/api v0.32.3
Expand Down Expand Up @@ -113,6 +112,7 @@ require (
go.uber.org/atomic v1.10.0 // indirect
go.uber.org/goleak v1.3.0 // indirect
golang.org/x/net v0.38.0 // indirect
golang.org/x/sync v0.13.0 // indirect
golang.org/x/sys v0.32.0 // indirect
golang.org/x/term v0.31.0 // indirect
golang.org/x/text v0.24.0 // indirect
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
apiVersion: monitoring.coreos.com/v1
kind: PodMonitor
metadata:
name: node-exporter
namespace: monitoring
labels:
app: node-exporter # must match your Prometheus CR's `podMonitorSelector`
spec:
selector:
matchLabels:
app: node-exporter
namespaceSelector:
matchNames:
- monitoring
podMetricsEndpoints:
- port: metrics
path: /metrics
interval: 5s

Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
apiVersion: apps/v1
kind: DaemonSet
metadata:
name: node-exporter
namespace: monitoring
labels:
app: node-exporter
spec:
selector:
matchLabels:
app: node-exporter
template:
metadata:
labels:
app: node-exporter
spec:
hostNetwork: true
hostPID: true
containers:
- name: node-exporter
image: quay.io/prometheus/node-exporter:latest
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Find a tag, not latest

args:
- "--path.procfs=/host/proc"
- "--path.sysfs=/host/sys"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Figure out what people want to collect

- "--collector.disable-defaults"
- "--collector.cpu"
- "--collector.meminfo"
- "--collector.loadavg"
- "--collector.stat"
ports:
- containerPort: 9100
hostPort: 9100
name: metrics
resources:
limits:
memory: 100Mi
requests:
cpu: 100m
memory: 50Mi
volumeMounts:
- name: proc
mountPath: /host/proc
readOnly: true
- name: sys
mountPath: /host/sys
readOnly: true
nodeSelector:
kubernetes.io/os: linux
volumes:
- name: proc
hostPath:
path: /proc
- name: sys
hostPath:
path: /sys
terminationGracePeriodSeconds: 30

This file was deleted.

45 changes: 2 additions & 43 deletions clusterloader2/pkg/prometheus/prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (
"sync"
"time"

"golang.org/x/sync/errgroup"
authenticationv1 "k8s.io/api/authentication/v1"
corev1 "k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
Expand Down Expand Up @@ -59,7 +58,7 @@ const (
kubeStateMetricsManifests = "exporters/kube-state-metrics/*.yaml"
masterIPServiceMonitors = "master-ip/*.yaml"
metricsServerManifests = "exporters/metrics-server/*.yaml"
nodeExporterPod = "exporters/node_exporter/node-exporter.yaml"
nodeExporterManifests = "exporters/node_exporter/*.yaml"
windowsNodeExporterManifests = "exporters/windows_node_exporter/*.yaml"
pushgatewayManifests = "pushgateway/*.yaml"
)
Expand Down Expand Up @@ -260,7 +259,7 @@ func (pc *Controller) SetUpPrometheusStack() error {
return err
}
if pc.clusterLoaderConfig.PrometheusConfig.ScrapeNodeExporter {
if err := pc.runNodeExporter(); err != nil {
if err := pc.applyDefaultManifests(nodeExporterManifests); err != nil {
return err
}
}
Expand Down Expand Up @@ -520,46 +519,6 @@ func (pc *Controller) configureRBACForMetrics(testClusterClientSet kubernetes.In
return nil
}

// runNodeExporter adds node-exporter as master's static manifest pod.
// TODO(mborsz): Consider migrating to something less ugly, e.g. daemonset-based approach,
// when master nodes have configured networking.
func (pc *Controller) runNodeExporter() error {
klog.V(2).Infof("Starting node-exporter on master nodes.")
kubemarkFramework, err := framework.NewFramework(&pc.clusterLoaderConfig.ClusterConfig, numK8sClients)
if err != nil {
return err
}

// Validate masters first
nodes, err := client.ListNodes(kubemarkFramework.GetClientSets().GetClient())
if err != nil {
return err
}

var g errgroup.Group
numMasters := 0
for _, node := range nodes {
node := node
if util.LegacyIsMasterNode(&node) || util.IsControlPlaneNode(&node) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do cloud environments even give access to the control plane nodes?

Copy link
Contributor Author

@MikeZappa87 MikeZappa87 Apr 27, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can understand the motivation for this to be a static pod however should we maintain this requirement? It's not hard to keep the daemonset and the static pods but they should be enabled with separate flags. What do we gain with these being static pods if the rest of the pods haven't been scheduled yet such as prometheus? It's probably safe to just use the daemonset.

numMasters++
g.Go(func() error {
f, err := manifestsFS.Open(nodeExporterPod)
if err != nil {
return fmt.Errorf("unable to open manifest file: %v", err)
}
defer f.Close()
return pc.ssh.Exec("sudo tee /etc/kubernetes/manifests/node-exporter.yaml > /dev/null", &node, f)
})
}
}

if numMasters == 0 {
return fmt.Errorf("node-exporter requires master to be registered nodes")
}

return g.Wait()
}

func (pc *Controller) waitForPrometheusToBeHealthy() error {
klog.V(2).Info("Waiting for Prometheus stack to become healthy...")
return wait.PollImmediate(
Expand Down