-
Notifications
You must be signed in to change notification settings - Fork 1.4k
fix: use compressed event size to close chunk #7517
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 3 commits
d8714ad
2e13c21
1c769a2
63dd180
3c313ec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -28,14 +28,18 @@ const ( | |
// written to the encoder and the encoder outputs chunks that are fit to the | ||
// configured limit. | ||
type chunkEncoder struct { | ||
limit int64 | ||
limit int64 | ||
bytesWritten int | ||
buf *bytes.Buffer | ||
w *gzip.Writer | ||
metrics metrics.Metrics | ||
|
||
// The soft limit is a dynamic limit that will maximize the amount of events that fit in each chunk. | ||
// After creating a chunk it will determine if it should scale up and down based on the chunk size vs the limit. | ||
// If the chunk didn't reach the limit perhaps future events could have been added. | ||
softLimit int64 | ||
softLimitScaleUpExponent float64 | ||
softLimitScaleDownExponent float64 | ||
bytesWritten int | ||
buf *bytes.Buffer | ||
w *gzip.Writer | ||
metrics metrics.Metrics | ||
} | ||
|
||
func newChunkEncoder(limit int64) *chunkEncoder { | ||
|
@@ -141,18 +145,30 @@ func (enc *chunkEncoder) reset() ([][]byte, error) { | |
// decisions in the last chunk. | ||
// 3) Equilibrium: If the chunk size is between 90% and 100% of the user-configured limit, maintain soft limit value. | ||
|
||
if enc.buf.Len() < int(float64(enc.limit)*encHardLimitThreshold) { | ||
if enc.bytesWritten < int(float64(enc.limit)*encHardLimitThreshold) { | ||
if enc.metrics != nil { | ||
enc.metrics.Counter(encSoftLimitScaleUpCounterName).Incr() | ||
} | ||
|
||
mul := int64(math.Pow(float64(softLimitBaseFactor), float64(enc.softLimitScaleUpExponent+1))) | ||
// this can cause enc.softLimit to overflow into a negative value | ||
enc.softLimit *= mul | ||
enc.softLimitScaleUpExponent += softLimitExponentScaleFactor | ||
|
||
// In Go an overflow wraps around using modulo arithmetic, so it could be negative. | ||
// enc.limit*2 is the ceiling for the soft limit, unless that also overflows then it will be (math.MaxInt64 - 1). | ||
if enc.softLimit < 0 || enc.softLimit > enc.limit*2 { | ||
limit := enc.limit * 2 | ||
if limit < 0 { | ||
limit = math.MaxInt64 - 1 | ||
} | ||
enc.softLimit = limit | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As always when it comes to math, I'm a bit confused 😅. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Deleted the math, it won't hurt us anymore 😜 |
||
enc.softLimitScaleUpExponent = 0 | ||
} | ||
return enc.update(), nil | ||
} | ||
|
||
if int(enc.limit) > enc.buf.Len() && enc.buf.Len() >= int(float64(enc.limit)*encHardLimitThreshold) { | ||
if int(enc.limit) > enc.bytesWritten && enc.bytesWritten >= int(float64(enc.limit)*encHardLimitThreshold) { | ||
if enc.metrics != nil { | ||
enc.metrics.Counter(encSoftLimitStableCounterName).Incr() | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -408,7 +408,7 @@ func TestPluginStartSameInput(t *testing.T) { | |
fixture := newTestFixture(t) | ||
defer fixture.server.stop() | ||
|
||
fixture.server.ch = make(chan []EventV1, 3) | ||
fixture.server.ch = make(chan []EventV1, 4) | ||
var result interface{} = false | ||
|
||
ts, err := time.Parse(time.RFC3339Nano, "2018-01-01T12:00:00.123456Z") | ||
|
@@ -441,12 +441,15 @@ func TestPluginStartSameInput(t *testing.T) { | |
chunk1 := <-fixture.server.ch | ||
chunk2 := <-fixture.server.ch | ||
chunk3 := <-fixture.server.ch | ||
chunk4 := <-fixture.server.ch | ||
expLen1 := 122 | ||
expLen2 := 242 | ||
expLen3 := 36 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For this test to remain equivalent, maybe we should raise the number of events fired, or tweak the upload limit, so that we keep producing multiple chunks here? |
||
expLen2 := 121 | ||
expLen3 := 121 | ||
expLen4 := 36 | ||
|
||
if len(chunk1) != expLen1 || len(chunk2) != expLen2 || len(chunk3) != expLen3 { | ||
t.Fatalf("Expected chunk lens %v, %v, and %v but got: %v, %v, and %v", expLen1, expLen2, expLen3, len(chunk1), len(chunk2), len(chunk3)) | ||
if len(chunk1) != expLen1 || len(chunk2) != expLen2 || len(chunk3) != expLen3 || len(chunk4) != expLen4 { | ||
t.Fatalf("Expected chunk lens %v, %v, %v and %v but got: %v, %v, %v and %v", | ||
expLen1, expLen2, expLen3, expLen4, len(chunk1), len(chunk2), len(chunk3), len(chunk4)) | ||
} | ||
|
||
var expInput interface{} = map[string]interface{}{"method": "GET"} | ||
|
@@ -472,8 +475,8 @@ func TestPluginStartSameInput(t *testing.T) { | |
Metrics: msAsFloat64, | ||
} | ||
|
||
if !reflect.DeepEqual(chunk3[expLen3-1], exp) { | ||
t.Fatalf("Expected %+v but got %+v", exp, chunk3[expLen3-1]) | ||
if !reflect.DeepEqual(chunk4[expLen4-1], exp) { | ||
t.Fatalf("Expected %+v but got %+v", exp, chunk4[expLen4-1]) | ||
} | ||
|
||
if fixture.plugin.status.Code != "" { | ||
|
@@ -489,7 +492,7 @@ func TestPluginStartChangingInputValues(t *testing.T) { | |
fixture := newTestFixture(t) | ||
defer fixture.server.stop() | ||
|
||
fixture.server.ch = make(chan []EventV1, 3) | ||
fixture.server.ch = make(chan []EventV1, 4) | ||
var result interface{} = false | ||
|
||
ts, err := time.Parse(time.RFC3339Nano, "2018-01-01T12:00:00.123456Z") | ||
|
@@ -521,12 +524,15 @@ func TestPluginStartChangingInputValues(t *testing.T) { | |
chunk1 := <-fixture.server.ch | ||
chunk2 := <-fixture.server.ch | ||
chunk3 := <-fixture.server.ch | ||
chunk4 := <-fixture.server.ch | ||
expLen1 := 124 | ||
expLen2 := 247 | ||
expLen3 := 29 | ||
expLen2 := 123 | ||
expLen3 := 123 | ||
expLen4 := 30 | ||
|
||
if len(chunk1) != expLen1 || len(chunk2) != expLen2 || len((chunk3)) != expLen3 { | ||
t.Fatalf("Expected chunk lens %v, %v and %v but got: %v, %v and %v", expLen1, expLen2, expLen3, len(chunk1), len(chunk2), len(chunk3)) | ||
if len(chunk1) != expLen1 || len(chunk2) != expLen2 || len(chunk3) != expLen3 || len(chunk4) != expLen4 { | ||
t.Fatalf("Expected chunk lens %v, %v, %v and %v but got: %v, %v, %v and %v", | ||
expLen1, expLen2, expLen3, expLen4, len(chunk1), len(chunk2), len(chunk3), len(chunk4)) | ||
} | ||
|
||
exp := EventV1{ | ||
|
@@ -544,8 +550,8 @@ func TestPluginStartChangingInputValues(t *testing.T) { | |
Timestamp: ts, | ||
} | ||
|
||
if !reflect.DeepEqual(chunk3[expLen3-1], exp) { | ||
t.Fatalf("Expected %+v but got %+v", exp, chunk3[expLen3-1]) | ||
if !reflect.DeepEqual(chunk4[expLen4-1], exp) { | ||
t.Fatalf("Expected %+v but got %+v", exp, chunk4[expLen4-1]) | ||
} | ||
} | ||
|
||
|
@@ -3564,6 +3570,8 @@ type testServer struct { | |
} | ||
|
||
func (t *testServer) handle(w http.ResponseWriter, r *http.Request) { | ||
t.t.Helper() | ||
|
||
gr, err := gzip.NewReader(r.Body) | ||
if err != nil { | ||
t.t.Fatal(err) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What are the circumstances for a scenario where we reach an overflow? Since the thing we're exponentially increasing is upload bytes, for us to overflow, wouldn't the previous successful reset need have had a soft-limit already terabytes in size?
This is intuition talking, and not me doing actual calculus, though, so I may be way off in my estimates. It's very likely I'm missing something here, since you've encountered this in your work and had to fix it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As we discussed, I updated the PR to now enforce a maximum configurable limit of
4294967296
instead removing the need to check if the soft limit will ever overflow.