-
Notifications
You must be signed in to change notification settings - Fork 76
feat: merge two metric servers #728
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,26 +18,22 @@ package main | |
|
||
import ( | ||
"flag" | ||
"net" | ||
"net/http" | ||
"os" | ||
"strconv" | ||
|
||
"github.com/go-logr/logr" | ||
"github.com/prometheus/client_golang/prometheus/promhttp" | ||
uberzap "go.uber.org/zap" | ||
"go.uber.org/zap/zapcore" | ||
"google.golang.org/grpc" | ||
healthPb "google.golang.org/grpc/health/grpc_health_v1" | ||
"k8s.io/client-go/rest" | ||
"k8s.io/component-base/metrics/legacyregistry" | ||
ctrl "sigs.k8s.io/controller-runtime" | ||
"sigs.k8s.io/controller-runtime/pkg/log/zap" | ||
"sigs.k8s.io/controller-runtime/pkg/manager" | ||
"sigs.k8s.io/controller-runtime/pkg/metrics/filters" | ||
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" | ||
|
||
"sigs.k8s.io/gateway-api-inference-extension/internal/runnable" | ||
"sigs.k8s.io/gateway-api-inference-extension/pkg/bbr/metrics" | ||
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/bbr/server" | ||
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics" | ||
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" | ||
) | ||
|
||
|
@@ -50,9 +46,8 @@ var ( | |
"grpcHealthPort", | ||
9005, | ||
"The port used for gRPC liveness and readiness probes") | ||
metricsPort = flag.Int( | ||
"metricsPort", 9090, "The metrics port") | ||
streaming = flag.Bool( | ||
metricsAddr = flag.String("metrics-bind-address", ":9090", "The address the metric endpoint binds to.") | ||
streaming = flag.Bool( | ||
"streaming", false, "Enables streaming support for Envoy full-duplex streaming mode") | ||
logVerbosity = flag.Int("v", logging.DEFAULT, "number for the log level verbosity") | ||
|
||
|
@@ -85,7 +80,18 @@ func run() error { | |
return err | ||
} | ||
|
||
mgr, err := ctrl.NewManager(cfg, ctrl.Options{}) | ||
metrics.Register() | ||
|
||
// Register metrics handler. | ||
// Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server. | ||
// More info: | ||
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/metrics/server | ||
// - https://book.kubebuilder.io/reference/metrics.html | ||
metricsServerOptions := metricsserver.Options{ | ||
BindAddress: *metricsAddr, | ||
FilterProvider: filters.WithAuthenticationAndAuthorization, | ||
} | ||
mgr, err := ctrl.NewManager(cfg, ctrl.Options{Metrics: metricsServerOptions}) | ||
if err != nil { | ||
setupLog.Error(err, "Failed to create manager", "config", cfg) | ||
return err | ||
|
@@ -107,11 +113,6 @@ func run() error { | |
return err | ||
} | ||
|
||
// Register metrics handler. | ||
if err := registerMetricsHandler(mgr, *metricsPort, cfg); err != nil { | ||
return err | ||
} | ||
|
||
// Start the manager. This blocks until a signal is received. | ||
setupLog.Info("Manager starting") | ||
if err := mgr.Start(ctx); err != nil { | ||
|
@@ -152,58 +153,3 @@ func initLogging(opts *zap.Options) { | |
logger := zap.New(zap.UseFlagOptions(opts), zap.RawZapOpts(uberzap.AddCaller())) | ||
ctrl.SetLogger(logger) | ||
} | ||
|
||
const metricsEndpoint = "/metrics" | ||
|
||
// registerMetricsHandler adds the metrics HTTP handler as a Runnable to the given manager. | ||
func registerMetricsHandler(mgr manager.Manager, port int, cfg *rest.Config) error { | ||
metrics.Register() | ||
|
||
// Init HTTP server. | ||
h, err := metricsHandlerWithAuthenticationAndAuthorization(cfg) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
mux := http.NewServeMux() | ||
mux.Handle(metricsEndpoint, h) | ||
|
||
srv := &http.Server{ | ||
Addr: net.JoinHostPort("", strconv.Itoa(port)), | ||
Handler: mux, | ||
} | ||
|
||
if err := mgr.Add(&manager.Server{ | ||
Name: "metrics", | ||
Server: srv, | ||
}); err != nil { | ||
setupLog.Error(err, "Failed to register metrics HTTP handler") | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
func metricsHandlerWithAuthenticationAndAuthorization(cfg *rest.Config) (http.Handler, error) { | ||
h := promhttp.HandlerFor( | ||
legacyregistry.DefaultGatherer, | ||
promhttp.HandlerOpts{}, | ||
) | ||
httpClient, err := rest.HTTPClientFor(cfg) | ||
if err != nil { | ||
setupLog.Error(err, "Failed to create http client for metrics auth") | ||
return nil, err | ||
} | ||
|
||
filter, err := filters.WithAuthenticationAndAuthorization(cfg, httpClient) | ||
if err != nil { | ||
setupLog.Error(err, "Failed to create metrics filter for auth") | ||
return nil, err | ||
} | ||
metricsLogger := ctrl.Log.WithName("metrics").WithValues("path", metricsEndpoint) | ||
metricsAuthHandler, err := filter(metricsLogger, h) | ||
if err != nil { | ||
setupLog.Error(err, "Failed to create metrics auth handler") | ||
return nil, err | ||
} | ||
return metricsAuthHandler, nil | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,38 +19,29 @@ package main | |
import ( | ||
"flag" | ||
"fmt" | ||
"net" | ||
"net/http" | ||
"os" | ||
"strconv" | ||
|
||
"github.com/go-logr/logr" | ||
"github.com/prometheus/client_golang/prometheus/promhttp" | ||
uberzap "go.uber.org/zap" | ||
"go.uber.org/zap/zapcore" | ||
"google.golang.org/grpc" | ||
healthPb "google.golang.org/grpc/health/grpc_health_v1" | ||
"k8s.io/apimachinery/pkg/types" | ||
"k8s.io/client-go/rest" | ||
"k8s.io/component-base/metrics/legacyregistry" | ||
ctrl "sigs.k8s.io/controller-runtime" | ||
"sigs.k8s.io/controller-runtime/pkg/log/zap" | ||
"sigs.k8s.io/controller-runtime/pkg/manager" | ||
"sigs.k8s.io/controller-runtime/pkg/metrics/filters" | ||
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" | ||
|
||
"sigs.k8s.io/gateway-api-inference-extension/internal/runnable" | ||
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics" | ||
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore" | ||
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics" | ||
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics/collectors" | ||
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling" | ||
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server" | ||
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging" | ||
) | ||
|
||
const ( | ||
defaultMetricsEndpoint = "/metrics" | ||
) | ||
|
||
var ( | ||
grpcPort = flag.Int( | ||
"grpcPort", | ||
|
@@ -60,8 +51,7 @@ var ( | |
"grpcHealthPort", | ||
9003, | ||
"The port used for gRPC liveness and readiness probes") | ||
metricsPort = flag.Int( | ||
"metricsPort", 9090, "The metrics port") | ||
metricsAddr = flag.String("metrics-bind-address", ":9090", "The address the metric endpoint binds to.") | ||
destinationEndpointHintKey = flag.String( | ||
"destinationEndpointHintKey", | ||
runserver.DefaultDestinationEndpointHintKey, | ||
|
@@ -143,11 +133,22 @@ func run() error { | |
return err | ||
} | ||
|
||
metrics.Register() | ||
// Register metrics handler. | ||
// Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server. | ||
// More info: | ||
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/metrics/server | ||
// - https://book.kubebuilder.io/reference/metrics.html | ||
metricsServerOptions := metricsserver.Options{ | ||
BindAddress: *metricsAddr, | ||
FilterProvider: filters.WithAuthenticationAndAuthorization, | ||
} | ||
|
||
poolNamespacedName := types.NamespacedName{ | ||
Name: *poolName, | ||
Namespace: *poolNamespace, | ||
} | ||
mgr, err := runserver.NewDefaultManager(poolNamespacedName, cfg) | ||
mgr, err := runserver.NewDefaultManager(poolNamespacedName, cfg, metricsServerOptions) | ||
if err != nil { | ||
setupLog.Error(err, "Failed to create controller manager") | ||
return err | ||
|
@@ -199,11 +200,6 @@ func run() error { | |
return err | ||
} | ||
|
||
// Register metrics handler. | ||
if err := registerMetricsHandler(mgr, *metricsPort, cfg, datastore); err != nil { | ||
return err | ||
} | ||
|
||
// Start the manager. This blocks until a signal is received. | ||
setupLog.Info("Controller manager starting") | ||
if err := mgr.Start(ctx); err != nil { | ||
|
@@ -247,62 +243,6 @@ func registerHealthServer(mgr manager.Manager, logger logr.Logger, ds datastore. | |
return nil | ||
} | ||
|
||
// registerMetricsHandler adds the metrics HTTP handler as a Runnable to the given manager. | ||
func registerMetricsHandler(mgr manager.Manager, port int, cfg *rest.Config, ds datastore.Datastore) error { | ||
metrics.Register() | ||
legacyregistry.CustomMustRegister(collectors.NewInferencePoolMetricsCollector(ds)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How will the EPP expose InferencePool metrics with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 that we need to keep the collector because the per pod queue size metric uses pod name as metric labels which will blow up the cardinality if it is not exported from a collector: https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/pkg/epp/metrics/collectors/inference_pool.go#L26 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for catching this. I need to figure out how to refactor it using |
||
|
||
metrics.RecordInferenceExtensionInfo() | ||
|
||
// Init HTTP server. | ||
h, err := metricsHandlerWithAuthenticationAndAuthorization(cfg) | ||
if err != nil { | ||
return err | ||
} | ||
|
||
mux := http.NewServeMux() | ||
mux.Handle(defaultMetricsEndpoint, h) | ||
|
||
srv := &http.Server{ | ||
Addr: net.JoinHostPort("", strconv.Itoa(port)), | ||
Handler: mux, | ||
} | ||
|
||
if err := mgr.Add(&manager.Server{ | ||
Name: "metrics", | ||
Server: srv, | ||
}); err != nil { | ||
setupLog.Error(err, "Failed to register metrics HTTP handler") | ||
return err | ||
} | ||
return nil | ||
} | ||
|
||
func metricsHandlerWithAuthenticationAndAuthorization(cfg *rest.Config) (http.Handler, error) { | ||
h := promhttp.HandlerFor( | ||
legacyregistry.DefaultGatherer, | ||
promhttp.HandlerOpts{}, | ||
) | ||
httpClient, err := rest.HTTPClientFor(cfg) | ||
if err != nil { | ||
setupLog.Error(err, "Failed to create http client for metrics auth") | ||
return nil, err | ||
} | ||
|
||
filter, err := filters.WithAuthenticationAndAuthorization(cfg, httpClient) | ||
if err != nil { | ||
setupLog.Error(err, "Failed to create metrics filter for auth") | ||
return nil, err | ||
} | ||
metricsLogger := ctrl.Log.WithName("metrics").WithValues("path", defaultMetricsEndpoint) | ||
metricsAuthHandler, err := filter(metricsLogger, h) | ||
if err != nil { | ||
setupLog.Error(err, "Failed to create metrics auth handler") | ||
return nil, err | ||
} | ||
return metricsAuthHandler, nil | ||
} | ||
|
||
func validateFlags() error { | ||
if *poolName == "" { | ||
return fmt.Errorf("required %q flag not set", "poolName") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
.Values.inferenceExtension.metricsPort
in./config/charts/inferencepool/templates/epp-service.yaml
should be updated to match this flag since an address can now be specified. However, implementations may be setting this flag so it's safer to keep the flag as-is and concatenate ":" withmetricsPort
formetricsserver.Options
. @kfswain @ahg-g are you OK with changing this flag? If the flag is changed, consider adding to the usage stating "" or "0" disables the metrics endpoint.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agreed that this may break implementations setting this flag on their own. Unless there is a strong reason to change it. I would prefer not to.