From e6f6f9d5b1abbe6bc630393696de232d6b68f28b Mon Sep 17 00:00:00 2001 From: Ryan Anderson Date: Wed, 27 Aug 2025 17:46:18 -0700 Subject: [PATCH 1/9] Introduce host simplification rules Today, Envoy only supports a single wildcard ("*") in virtual host domain entries, which must be either a prefix or suffix. This limitation greatly simplifies the virtual host matching logic, but it is also inherently limiting. This change introduces a repeated list of "host simplification rules" at the RouteConfiguration level, that provide a way to substitute away other dynamic portions of the domain without changing what is sent upstream. For example, to match something like `*.foo.*.example.org` you might write a simplification rule like: `([^.]+[.]foo[.])([^.]+)([.]example[.]org)` with a substitution of `\1bar\3`. This then allows a virtual host domain entry of `*.foo.bar.example.org` to match `baz.foo.bar.example.org` or `wowza.foo.qux.example.org`. Host simplification rules are processed in the order they are defined. Signed-off-by: Ryan Anderson --- api/envoy/config/route/v3/route.proto | 9 +++++ changelogs/current.yaml | 5 +++ source/common/router/config_impl.cc | 21 ++++++++++ source/common/router/config_impl.h | 9 +++++ test/common/router/config_impl_test.cc | 56 ++++++++++++++++++++++++++ 5 files changed, 100 insertions(+) diff --git a/api/envoy/config/route/v3/route.proto b/api/envoy/config/route/v3/route.proto index c4d507d22b016..3d35a49d1cda8 100644 --- a/api/envoy/config/route/v3/route.proto +++ b/api/envoy/config/route/v3/route.proto @@ -5,6 +5,7 @@ package envoy.config.route.v3; import "envoy/config/core/v3/base.proto"; import "envoy/config/core/v3/config_source.proto"; import "envoy/config/route/v3/route_components.proto"; +import "envoy/type/matcher/v3/regex.proto"; import "google/protobuf/any.proto"; import "google/protobuf/wrappers.proto"; @@ -155,6 +156,14 @@ message RouteConfiguration { // For instance, if the metadata is intended for the Router filter, // the filter name should be specified as ``envoy.filters.http.router``. core.v3.Metadata metadata = 17; + + // The host simplification rules are a set of regex substitutions + // that can modify the :authority used when matching + // VirtualHosts. It will not change what is sent upstream. This can + // be used to implement multiple-wildcard matching, by converting + // all but one of the wildcards into a static string. + // This is similar to ignore_port_in_host_matching (above), but more flexible. + repeated type.matcher.v3.RegexMatchAndSubstitute host_simplification_rules = 18; } message Vhds { diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 316b2373f7e1c..726d8b1847c63 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -359,5 +359,10 @@ new_features: Added a new metric ``db_build_epoch`` to track the build timestamp of the MaxMind geolocation database files. This can be used to monitor the freshness of the databases currently in use by the filter. See `MaxMind DB build_epoch `_ for more details. +- area: http + change: | + Added support for :ref:`host_simplification_rules ` to allow for + regular expression substitutions to "simplify" a host before doing + virtual host matching. deprecated: diff --git a/source/common/router/config_impl.cc b/source/common/router/config_impl.cc index 7059d996bea7e..f7d16eeb77c2c 100644 --- a/source/common/router/config_impl.cc +++ b/source/common/router/config_impl.cc @@ -1929,6 +1929,17 @@ RouteMatcher::RouteMatcher(const envoy::config::route::v3::RouteConfiguration& r } } } + for (const auto& simplification_rule : route_config.host_simplification_rules()) { + auto result = + Regex::Utility::parseRegex(simplification_rule.pattern(), factory_context.regexEngine()); + + SET_AND_RETURN_IF_NOT_OK(result.status(), creation_status); + + std::unique_ptr rule = std::make_unique( + std::move(*result), simplification_rule.substitution()); + + host_simplification_rules_.push_back(std::move(rule)); + } } const VirtualHostImpl* RouteMatcher::findVirtualHost(const Http::RequestHeaderMap& headers) const { @@ -1952,6 +1963,16 @@ const VirtualHostImpl* RouteMatcher::findVirtualHost(const Http::RequestHeaderMa host_header_value = host_header_value.substr(0, port_start); } } + + // If any host simplification rules exist, process them in order to + // rewrite the host header used when looking up virtual hosts. (This + // is notionally similar to the handling of + // `ignore_port_in_host_matching`, but more flexible.) + for (const auto& simplifier : host_simplification_rules_) { + host_header_value = + simplifier->matcher->replaceAll(host_header_value, simplifier->substitution); + } + // TODO (@rshriram) Match Origin header in WebSocket // request with VHost, using wildcard match // Lower-case the value of the host header, as hostnames are case insensitive. diff --git a/source/common/router/config_impl.h b/source/common/router/config_impl.h index 5122a0fe1e3c6..2a54639ee8586 100644 --- a/source/common/router/config_impl.h +++ b/source/common/router/config_impl.h @@ -9,6 +9,7 @@ #include #include +#include "envoy/config/common/matcher/v3/matcher.pb.h" #include "envoy/config/core/v3/base.pb.h" #include "envoy/config/route/v3/route.pb.h" #include "envoy/config/route/v3/route_components.pb.h" @@ -1255,6 +1256,12 @@ class RouteListMatchActionFactory : public Matcher::ActionFactory> host_simplification_rules_; }; /** diff --git a/test/common/router/config_impl_test.cc b/test/common/router/config_impl_test.cc index 438646cbf22be..9e5f9e9adc9c9 100644 --- a/test/common/router/config_impl_test.cc +++ b/test/common/router/config_impl_test.cc @@ -2495,6 +2495,62 @@ TEST_F(RouteMatcherTest, IgnorePortInHostMatching) { } } +// Tests that host_simplification_rules mutate/simplify the host used +// for picking the virtualhost when matching +TEST_F(RouteMatcherTest, HostSimplificationRules) { + const std::string yaml = R"EOF( +host_simplification_rules: +- pattern: + regex: "^(foo[.])([^.]+)([.]example[.]org)$" + substitution: \1bar\3 +virtual_hosts: +- name: local_service + domains: ["foo.bar.example.org"] + routes: + - match: + prefix: "" + name: "business-specific-route" + route: + cluster: local_service_grpc +- name: catchall_host + domains: + - "*" + routes: + - match: + prefix: "" + name: "default-route" + route: + cluster: default_catch_all_service + )EOF"; + auto route_configuration = parseRouteConfigurationFromYaml(yaml); + + factory_context_.cluster_manager_.initializeClusters( + {"local_service_grpc", "default_catch_all_service"}, {}); + { + TestConfigImpl config(route_configuration, factory_context_, true, creation_status_); + // First, the trivial, no substitution needed, but should happen anyway: + EXPECT_EQ(config.route(genHeaders("foo.bar.example.org", "/foo", "GET"), 0)->routeName(), + "business-specific-route"); + // Matches, but requires the substitution to happen: + EXPECT_EQ(config.route(genHeaders("foo.baz.example.org", "/foo", "GET"), 0)->routeName(), + "business-specific-route"); + // Matches, require substitution, longer replaceable section + EXPECT_EQ( + config.route(genHeaders("foo.barbazquxfoobang.example.org", "/foo", "GET"), 0)->routeName(), + "business-specific-route"); + // Shouldn't match, but has a related substring: + EXPECT_EQ(config.route(genHeaders("qux.foo.baz.example.org", "/foo", "GET"), 0)->routeName(), + "default-route"); + // Shouldn't match (trivial) + EXPECT_EQ(config.route(genHeaders("12.34.56.78:1234", "/foo", "GET"), 0)->routeName(), + "default-route"); + EXPECT_EQ(config.route(genHeaders("www.foo.com:8090", "/foo", "GET"), 0)->routeName(), + "default-route"); + EXPECT_EQ(config.route(genHeaders("[12:34:56:7890::]:8090", "/foo", "GET"), 0)->routeName(), + "default-route"); + } +} + TEST_F(RouteMatcherTest, Priority) { const std::string yaml = R"EOF( virtual_hosts: From 3bd0d8210b9bcf2700871f191dc5b629da78806d Mon Sep 17 00:00:00 2001 From: Ryan Anderson Date: Fri, 5 Sep 2025 18:15:39 -0700 Subject: [PATCH 2/9] Add an example to the proto definition Along with some more words of explanation on how this all works. Signed-off-by: Ryan Anderson --- api/envoy/config/route/v3/route.proto | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/api/envoy/config/route/v3/route.proto b/api/envoy/config/route/v3/route.proto index 3d35a49d1cda8..fe60272d75994 100644 --- a/api/envoy/config/route/v3/route.proto +++ b/api/envoy/config/route/v3/route.proto @@ -163,6 +163,22 @@ message RouteConfiguration { // be used to implement multiple-wildcard matching, by converting // all but one of the wildcards into a static string. // This is similar to ignore_port_in_host_matching (above), but more flexible. + // To use this, at least one simplification rule must be configured, + // and then a + // :ref:`envoy_v3_api_msg_config.route.v3.VirtualHost`.domains field + // must be set to match the results of the simplification rule. + // An example may help: + // + // host_simplification_rules: + // - pattern: + // regex: "^(foo)[.]([^.]+)[.](example[.]org)$" + // substitution: \1.bar.\3 + // + // will allow a HTTP request with an :authority header of + // 'foo.anything.example.org' or 'foo.something.example.org' to both + // be matched by a VirtualHost with a domain entry of + // 'foo.bar.example.org', due to the second label in the domain + // being replaced by the simplification rule to 'bar'. repeated type.matcher.v3.RegexMatchAndSubstitute host_simplification_rules = 18; } From 024a9e2d59e57dbdbacb6e82b647d7967b2a6a92 Mon Sep 17 00:00:00 2001 From: Ryan Anderson Date: Fri, 5 Sep 2025 20:46:14 -0700 Subject: [PATCH 3/9] Fix the next-free-field Signed-off-by: Ryan Anderson --- api/envoy/config/route/v3/route.proto | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/envoy/config/route/v3/route.proto b/api/envoy/config/route/v3/route.proto index fe60272d75994..f32b6f062c4f5 100644 --- a/api/envoy/config/route/v3/route.proto +++ b/api/envoy/config/route/v3/route.proto @@ -24,7 +24,7 @@ option (udpa.annotations.file_status).package_version_status = ACTIVE; // * Routing :ref:`architecture overview ` // * HTTP :ref:`router filter ` -// [#next-free-field: 18] +// [#next-free-field: 19] message RouteConfiguration { option (udpa.annotations.versioning).previous_message_type = "envoy.api.v2.RouteConfiguration"; From 1cae50f7fb4dd81de1e413103420e6f4ce6c8e90 Mon Sep 17 00:00:00 2001 From: Ryan Anderson Date: Sat, 6 Sep 2025 14:50:29 -0700 Subject: [PATCH 4/9] docs? I have no idea Signed-off-by: Ryan Anderson --- api/envoy/config/route/v3/route.proto | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/api/envoy/config/route/v3/route.proto b/api/envoy/config/route/v3/route.proto index f32b6f062c4f5..6d644f2f64ed5 100644 --- a/api/envoy/config/route/v3/route.proto +++ b/api/envoy/config/route/v3/route.proto @@ -169,10 +169,10 @@ message RouteConfiguration { // must be set to match the results of the simplification rule. // An example may help: // - // host_simplification_rules: - // - pattern: - // regex: "^(foo)[.]([^.]+)[.](example[.]org)$" - // substitution: \1.bar.\3 + // > host_simplification_rules: + // > - pattern: + // > regex: "^(foo)[.]([^.]+)[.](example[.]org)$" + // > substitution: \1.bar.\3 // // will allow a HTTP request with an :authority header of // 'foo.anything.example.org' or 'foo.something.example.org' to both From 880b39c7a3e2c2f3cd9007ed5810486f19897cba Mon Sep 17 00:00:00 2001 From: Ryan Anderson Date: Sat, 6 Sep 2025 14:51:53 -0700 Subject: [PATCH 5/9] docs2? I have no idea Signed-off-by: Ryan Anderson --- api/envoy/config/route/v3/route.proto | 2 -- 1 file changed, 2 deletions(-) diff --git a/api/envoy/config/route/v3/route.proto b/api/envoy/config/route/v3/route.proto index 6d644f2f64ed5..09df125b781ab 100644 --- a/api/envoy/config/route/v3/route.proto +++ b/api/envoy/config/route/v3/route.proto @@ -168,12 +168,10 @@ message RouteConfiguration { // :ref:`envoy_v3_api_msg_config.route.v3.VirtualHost`.domains field // must be set to match the results of the simplification rule. // An example may help: - // // > host_simplification_rules: // > - pattern: // > regex: "^(foo)[.]([^.]+)[.](example[.]org)$" // > substitution: \1.bar.\3 - // // will allow a HTTP request with an :authority header of // 'foo.anything.example.org' or 'foo.something.example.org' to both // be matched by a VirtualHost with a domain entry of From f6b40c39ab2dd8d3c8e13ed7f90854a83be5f6a6 Mon Sep 17 00:00:00 2001 From: Ryan Anderson Date: Thu, 11 Sep 2025 13:23:23 -0700 Subject: [PATCH 6/9] wrap changelog properly Signed-off-by: Ryan Anderson --- changelogs/current.yaml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/changelogs/current.yaml b/changelogs/current.yaml index 726d8b1847c63..e5e362233f210 100644 --- a/changelogs/current.yaml +++ b/changelogs/current.yaml @@ -361,8 +361,9 @@ new_features: See `MaxMind DB build_epoch `_ for more details. - area: http change: | - Added support for :ref:`host_simplification_rules ` to allow for - regular expression substitutions to "simplify" a host before doing - virtual host matching. + Added support for :ref:`host_simplification_rules + ` + to allow for regular expression substitutions to "simplify" a host + before doing virtual host matching. deprecated: From 4a37f220336ae7577bfca3147160091af20d244f Mon Sep 17 00:00:00 2001 From: Ryan Anderson Date: Tue, 16 Sep 2025 07:10:28 +0000 Subject: [PATCH 7/9] Try to make the tests pass under gcc I don't understand why moving the lower casing to earlier makes the test pass, but it seems to, and, well, it makes as much sense as why they were failing in the first place. Signed-off-by: Ryan Anderson --- source/common/router/config_impl.cc | 12 ++++++------ test/common/router/config_impl_test.cc | 10 ++++++++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/source/common/router/config_impl.cc b/source/common/router/config_impl.cc index f7d16eeb77c2c..2dc520cfa5b18 100644 --- a/source/common/router/config_impl.cc +++ b/source/common/router/config_impl.cc @@ -1954,8 +1954,10 @@ const VirtualHostImpl* RouteMatcher::findVirtualHost(const Http::RequestHeaderMa return nullptr; } + // Lower-case the value of the host header, as hostnames are case insensitive. + absl::string_view host_header_value = absl::AsciiStrToLower(headers.getHostValue()); + // If 'ignore_port_in_host_matching' is set, ignore the port number in the host header(if any). - absl::string_view host_header_value = headers.getHostValue(); if (ignorePortInHostMatching()) { if (const absl::string_view::size_type port_start = Http::HeaderUtility::getPortStart(host_header_value); @@ -1975,15 +1977,13 @@ const VirtualHostImpl* RouteMatcher::findVirtualHost(const Http::RequestHeaderMa // TODO (@rshriram) Match Origin header in WebSocket // request with VHost, using wildcard match - // Lower-case the value of the host header, as hostnames are case insensitive. - const std::string host = absl::AsciiStrToLower(host_header_value); - const auto iter = virtual_hosts_.find(host); + const auto iter = virtual_hosts_.find(host_header_value); if (iter != virtual_hosts_.end()) { return iter->second.get(); } if (!wildcard_virtual_host_suffixes_.empty()) { const VirtualHostImpl* vhost = findWildcardVirtualHost( - host, wildcard_virtual_host_suffixes_, + host_header_value, wildcard_virtual_host_suffixes_, [](absl::string_view h, int l) -> absl::string_view { return h.substr(h.size() - l); }); if (vhost != nullptr) { return vhost; @@ -1991,7 +1991,7 @@ const VirtualHostImpl* RouteMatcher::findVirtualHost(const Http::RequestHeaderMa } if (!wildcard_virtual_host_prefixes_.empty()) { const VirtualHostImpl* vhost = findWildcardVirtualHost( - host, wildcard_virtual_host_prefixes_, + host_header_value, wildcard_virtual_host_prefixes_, [](absl::string_view h, int l) -> absl::string_view { return h.substr(0, l); }); if (vhost != nullptr) { return vhost; diff --git a/test/common/router/config_impl_test.cc b/test/common/router/config_impl_test.cc index 9e5f9e9adc9c9..762dfc2543a8d 100644 --- a/test/common/router/config_impl_test.cc +++ b/test/common/router/config_impl_test.cc @@ -2498,11 +2498,12 @@ TEST_F(RouteMatcherTest, IgnorePortInHostMatching) { // Tests that host_simplification_rules mutate/simplify the host used // for picking the virtualhost when matching TEST_F(RouteMatcherTest, HostSimplificationRules) { + const std::string yaml = R"EOF( host_simplification_rules: - pattern: - regex: "^(foo[.])([^.]+)([.]example[.]org)$" - substitution: \1bar\3 + regex: "^(foo)[.]([^.]+)[.](example[.]org)$" + substitution: "\\1.bar.\\3" virtual_hosts: - name: local_service domains: ["foo.bar.example.org"] @@ -2528,19 +2529,24 @@ TEST_F(RouteMatcherTest, HostSimplificationRules) { {"local_service_grpc", "default_catch_all_service"}, {}); { TestConfigImpl config(route_configuration, factory_context_, true, creation_status_); + // First, the trivial, no substitution needed, but should happen anyway: EXPECT_EQ(config.route(genHeaders("foo.bar.example.org", "/foo", "GET"), 0)->routeName(), "business-specific-route"); + // Matches, but requires the substitution to happen: EXPECT_EQ(config.route(genHeaders("foo.baz.example.org", "/foo", "GET"), 0)->routeName(), "business-specific-route"); + // Matches, require substitution, longer replaceable section EXPECT_EQ( config.route(genHeaders("foo.barbazquxfoobang.example.org", "/foo", "GET"), 0)->routeName(), "business-specific-route"); + // Shouldn't match, but has a related substring: EXPECT_EQ(config.route(genHeaders("qux.foo.baz.example.org", "/foo", "GET"), 0)->routeName(), "default-route"); + // Shouldn't match (trivial) EXPECT_EQ(config.route(genHeaders("12.34.56.78:1234", "/foo", "GET"), 0)->routeName(), "default-route"); From 0926cca260b56a537668f831d3a7c1d39af6e21a Mon Sep 17 00:00:00 2001 From: Ryan Anderson Date: Wed, 17 Sep 2025 20:56:00 +0000 Subject: [PATCH 8/9] Keeping references to disappearing variables is bad, mmm k? Signed-off-by: Ryan Anderson --- source/common/router/config_impl.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/common/router/config_impl.cc b/source/common/router/config_impl.cc index 2dc520cfa5b18..cf685e683c673 100644 --- a/source/common/router/config_impl.cc +++ b/source/common/router/config_impl.cc @@ -1955,7 +1955,7 @@ const VirtualHostImpl* RouteMatcher::findVirtualHost(const Http::RequestHeaderMa } // Lower-case the value of the host header, as hostnames are case insensitive. - absl::string_view host_header_value = absl::AsciiStrToLower(headers.getHostValue()); + std::string host_header_value = absl::AsciiStrToLower(headers.getHostValue()); // If 'ignore_port_in_host_matching' is set, ignore the port number in the host header(if any). if (ignorePortInHostMatching()) { From 6eb0a2d3826c466cc775c89d2ee8449346de3154 Mon Sep 17 00:00:00 2001 From: Ryan Anderson Date: Thu, 18 Sep 2025 01:56:06 +0000 Subject: [PATCH 9/9] Update comments about multiple rules Signed-off-by: Ryan Anderson --- api/envoy/config/route/v3/route.proto | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/api/envoy/config/route/v3/route.proto b/api/envoy/config/route/v3/route.proto index 09df125b781ab..53db0ae36a2fa 100644 --- a/api/envoy/config/route/v3/route.proto +++ b/api/envoy/config/route/v3/route.proto @@ -177,6 +177,12 @@ message RouteConfiguration { // be matched by a VirtualHost with a domain entry of // 'foo.bar.example.org', due to the second label in the domain // being replaced by the simplification rule to 'bar'. + // If multiple rules are provided, they are processed in order. The + // results of the first rule will be used by the second rule, and so + // on. It is unlikely that you want to depend on this behavior, + // however, due to the potential for confusion. It is recommended + // that, if you need multiple simplification rules, they should be + // as independent of each other as possible. repeated type.matcher.v3.RegexMatchAndSubstitute host_simplification_rules = 18; }