Skip to content

Commit ea667f2

Browse files
committed
init
0 parents  commit ea667f2

File tree

10 files changed

+920
-0
lines changed

10 files changed

+920
-0
lines changed

.github/workflows/main.yml

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
on:
2+
push:
3+
branches: [main]
4+
env:
5+
GO_VERSION: 1.22.2
6+
DOCKER_TAG_OUR_VERSION: "latest"
7+
jobs:
8+
main:
9+
runs-on: ubuntu-latest
10+
permissions:
11+
packages: write
12+
contents: read
13+
14+
steps:
15+
- uses: actions/checkout@v4
16+
- name: Build and publish to GHCR
17+
env:
18+
DOCKER_TAG_OUR_VERSION: "latest"
19+
run: |
20+
docker buildx create \
21+
--name container \
22+
--driver=docker-container \
23+
--use
24+
docker login ghcr.io -u ${{ github.actor }} -p ${{ secrets.GITHUB_TOKEN }}
25+
docker buildx build \
26+
--cache-from=ghcr.io/${{ github.repository }}:_cache \
27+
--cache-to=ghcr.io/${{ github.repository }}:_cache \
28+
--build-arg GO_VERSION=${GO_VERSION} \
29+
-t ghcr.io/${{ github.repository }}:go${GO_VERSION}-${DOCKER_TAG_OUR_VERSION} \
30+
--push \
31+
--target=gocacheprog-s3 \
32+
-f Dockerfile .
33+
34+
outputs:
35+
image: ghcr.io/${{ github.repository }}

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
gocacheprog-s3

Dockerfile

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
ARG GO_VERSION
2+
FROM golang:${GO_VERSION} as golang-cacheprog
3+
4+
# copy out of /usr/local/go; can't use GOROOT
5+
# TODO: there's probably something we can do with GOTOOLCHAIN, but I don't understand it well enough
6+
RUN cp -r /usr/local/go /gocacheprog
7+
RUN cd /gocacheprog/src && GOEXPERIMENT=cacheprog ./make.bash
8+
RUN cp -r /gocacheprog/* /usr/local/go/
9+
RUN go version | grep cacheprog
10+
RUN rm -rf /gocacheprog
11+
12+
FROM golang-cacheprog as gocacheprog-s3
13+
ADD . /workdir
14+
RUN cd /workdir && go install . && rm -rf /workdir
15+
16+
# TODO: not sure this affects the EXEC
17+
ENV GOCACHEPROG="gocacheprog-s3 -v=1"

README.md

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# gocacheprog-s3
2+
3+
`gocacheprog-s3` is a "cacheprog" for Go that uses S3.
4+
5+
# Usage
6+
7+
[As of writing (Apr 2024), `GOCACHEPROG` support requires a custom build of the Go toolchain](https://github.com/golang/go/issues/64876). As such, we provide a Docker image with both a custom build of the Go toolchain and the `gocacheprog-s3` binary baked in.
8+
9+
```
10+
ghcr.io/nfi-hashicorp/gocacheprog-s3:latest-go1.22.2
11+
```
12+
13+
## Docker example
14+
15+
```console
16+
% docker run --platform=linux/amd64 -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e AWS_SESSION_TOKEN -e AWS_REGION -e GOCACHEPROG="gocacheprog-s3 -v=2 mycoolbucket" ghcr.io/nfi-hashicorp/gocacheprog-s3:go1.22.2-latest go install github.com/nfi-hashicorp/gocacheprog-s3@latest
17+
...
18+
disk stats:
19+
413 gets: 0 hits, 413 misses, 0 errors, 0s total dur
20+
901 puts: 0 errors, 0s total dur
21+
s3 stats:
22+
414 gets: 1 hits, 413 misses, 0 errors, 0s total dur; total 0.00 MB; avg 0.00 MB/s
23+
902 puts: 0 errors, 1m17.3s total dur; total 127.59 MB; avg 1.65 MB/s
24+
```
25+
26+
And then again to see the speed up.
27+
28+
```console
29+
% docker run --platform=linux/amd64 -e AWS_ACCESS_KEY_ID -e AWS_SECRET_ACCESS_KEY -e AWS_SESSION_TOKEN -e AWS_REGION -e GOCACHEPROG="gocacheprog-s3 -v=2 mycoolbucket" ghcr.io/nfi-hashicorp/gocacheprog-s3:go1.22.2-latest go install github.com/nfi-hashicorp/gocacheprog-s3@latest
30+
...
31+
disk stats:
32+
642 gets: 0 hits, 642 misses, 0 errors, 0s total dur
33+
642 puts: 0 errors, 0s total dur
34+
s3 stats:
35+
643 gets: 642 hits, 1 misses, 0 errors, 34.8s total dur; total 127.53 MB; avg 3.66 MB/s
36+
2 puts: 0 errors, 300ms total dur; total 0.00 MB; avg 0.00 MB/s
37+
```
38+
39+
## Local dev example
40+
41+
```console
42+
% GOCACHEPROG="./gocacheprog-s3 -v=2 -local-cache-dir=go-cache $BUCKET"
43+
% aws s3 rm --recursive s3://$BUCKET/go-cache > /dev/null 2>&1
44+
% rm -rf go-cache
45+
% go build .
46+
disk stats:
47+
413 gets: 0 hits, 413 misses, 0 errors, 0s total dur
48+
893 puts: 0 errors, 0s total dur
49+
s3 stats:
50+
414 gets: 1 hits, 413 misses, 0 errors, 0s total dur; total 0.00 MB; avg 0.00 MB/s
51+
894 puts: 0 errors, 1m18.7s total dur; total 140.99 MB; avg 1.79 MB/s
52+
total time: 1m21s
53+
```
54+
55+
And then it goes *much* faster the second time.
56+
57+
```console
58+
% rm -rf go-cache
59+
% go build .
60+
disk stats:
61+
641 gets: 0 hits, 641 misses, 0 errors, 0s total dur
62+
641 puts: 0 errors, 0s total dur
63+
s3 stats:
64+
642 gets: 641 hits, 1 misses, 0 errors, 35.8s total dur; total 140.42 MB; avg 3.92 MB/s
65+
2 puts: 0 errors, 300ms total dur; total 0.00 MB; avg 0.01 MB/s
66+
total time: 12s
67+
```
68+
69+
# Credits
70+
71+
Derived from https://github.com/bradfitz/go-tool-cache and [or-shachar/go-tool-cache](https://github.com/or-shachar/go-tool-cache/commit/cc47faab56325a022ff59cd7277abbf99ff4f8ff).

counts.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package main
2+
3+
import (
4+
"encoding/csv"
5+
"fmt"
6+
"io"
7+
"strconv"
8+
"sync/atomic"
9+
"time"
10+
11+
uberatomic "go.uber.org/atomic"
12+
)
13+
14+
// Counts keeps counts cache events
15+
type Counts struct {
16+
gets atomic.Int64
17+
hits atomic.Int64
18+
misses atomic.Int64
19+
puts atomic.Int64
20+
getErrors atomic.Int64
21+
putErrors atomic.Int64
22+
totalGetBytes atomic.Int64
23+
totalGetDur uberatomic.Duration
24+
totalPutBytes atomic.Int64
25+
totalPutDur uberatomic.Duration
26+
}
27+
28+
func (c *Counts) Summary() string {
29+
getsLine := fmt.Sprintf("%d gets: %d hits, %d misses, %d errors, %s total dur",
30+
c.gets.Load(), c.hits.Load(), c.misses.Load(), c.getErrors.Load(), c.totalGetDur.Load().Round(100*time.Millisecond))
31+
if c.totalGetBytes.Load() > 0 {
32+
getsLine += fmt.Sprintf("; total %.2f MB; avg %.2f MB/s",
33+
float64(c.totalGetBytes.Load())/1_000_000.0, float64(c.totalGetBytes.Load())/1_000_000.0/c.totalGetDur.Load().Seconds())
34+
}
35+
putsLine := fmt.Sprintf("%d puts: %d errors, %s total dur",
36+
c.puts.Load(), c.putErrors.Load(), c.totalPutDur.Load().Round(100*time.Millisecond))
37+
if c.totalPutBytes.Load() > 0 {
38+
putsLine += fmt.Sprintf("; total %.2f MB; avg %.2f MB/s",
39+
float64(c.totalPutBytes.Load())/1_000_000.0, float64(c.totalPutBytes.Load())/1_000_000.0/c.totalPutDur.Load().Seconds())
40+
}
41+
return fmt.Sprintf("%s\n%s", getsLine, putsLine)
42+
}
43+
44+
// TODO: maybe there's a way to do this in stdlib, but I couldn't find it
45+
// this should give us a timestamp that at the very least Google Sheets supports,
46+
// like
47+
func csvDuration(d time.Duration) string {
48+
return time.Unix(0, 0).UTC().Add(d).Format("15:04:05.000")
49+
}
50+
51+
func (c *Counts) CSV(f io.Writer, header bool) error {
52+
w := csv.NewWriter(f)
53+
if header {
54+
err := w.Write([]string{"gets", "hits", "misses", "puts", "getErrors", "putErrors", "totalGetBytes", "totalGetDur", "totalPutBytes", "totalPutDur"})
55+
if err != nil {
56+
return err
57+
}
58+
}
59+
err := w.Write([]string{
60+
strconv.Itoa(int(c.gets.Load())),
61+
strconv.Itoa(int(c.hits.Load())),
62+
strconv.Itoa(int(c.misses.Load())),
63+
strconv.Itoa(int(c.puts.Load())),
64+
strconv.Itoa(int(c.getErrors.Load())),
65+
strconv.Itoa(int(c.putErrors.Load())),
66+
strconv.Itoa(int(c.totalGetBytes.Load())),
67+
csvDuration(c.totalGetDur.Load()),
68+
strconv.Itoa(int(c.totalPutBytes.Load())),
69+
csvDuration(c.totalPutDur.Load()),
70+
})
71+
if err != nil {
72+
return err
73+
}
74+
w.Flush()
75+
return w.Error()
76+
}

disk.go

Lines changed: 171 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,171 @@
1+
package main
2+
3+
import (
4+
"bytes"
5+
"context"
6+
"encoding/hex"
7+
"encoding/json"
8+
"fmt"
9+
"io"
10+
"log"
11+
"log/slog"
12+
"os"
13+
"path/filepath"
14+
"time"
15+
)
16+
17+
// indexEntry is the metadata that SimpleDiskCache stores on disk for an ActionID.
18+
type indexEntry struct {
19+
Version int `json:"v"`
20+
OutputID string `json:"o"`
21+
Size int64 `json:"n"`
22+
TimeNanos int64 `json:"t"`
23+
}
24+
25+
// DiskCache is a cache that stores objects as files on disk.
26+
//
27+
// It is a fork of [github.com/bradfitz/go-tool-cache/blob/main/cachers/disk.go#DiskCache] that adds counters and more logging
28+
type DiskCache struct {
29+
Counts
30+
dir string
31+
started bool
32+
log *slog.Logger
33+
}
34+
35+
func NewDiskCache(dir string) *DiskCache {
36+
return &DiskCache{
37+
dir: dir,
38+
log: slog.Default().WithGroup("disk"),
39+
}
40+
}
41+
42+
func (c *DiskCache) Start(context.Context) error {
43+
c.log.Debug("start", "dir", c.dir)
44+
err := os.MkdirAll(c.dir, 0755)
45+
if err != nil {
46+
return err
47+
}
48+
c.started = true
49+
return nil
50+
}
51+
52+
func (c *DiskCache) Get(_ context.Context, actionID string) (outputID, diskPath string, err error) {
53+
if !c.started {
54+
log.Fatal("not started")
55+
}
56+
c.Counts.gets.Add(1)
57+
c.log.Debug("get", "actionID", actionID)
58+
actionFile := filepath.Join(c.dir, fmt.Sprintf("a-%s", actionID))
59+
ij, err := os.ReadFile(actionFile)
60+
if err != nil {
61+
if os.IsNotExist(err) {
62+
c.Counts.misses.Add(1)
63+
return "", "", nil
64+
}
65+
c.Counts.getErrors.Add(1)
66+
return "", "", err
67+
}
68+
var ie indexEntry
69+
if err := json.Unmarshal(ij, &ie); err != nil {
70+
c.log.Error("json error", "actionID", actionID, "err", err)
71+
c.Counts.getErrors.Add(1)
72+
return "", "", nil
73+
}
74+
if _, err := hex.DecodeString(ie.OutputID); err != nil {
75+
c.Counts.getErrors.Add(1)
76+
// Protect against malicious non-hex OutputID on disk
77+
return "", "", nil
78+
}
79+
c.Counts.hits.Add(1)
80+
return ie.OutputID, filepath.Join(c.dir, fmt.Sprintf("o-%v", ie.OutputID)), nil
81+
}
82+
83+
func (c *DiskCache) Put(_ context.Context, actionID, objectID string, size int64, body io.Reader) (diskPath string, _ error) {
84+
if !c.started {
85+
log.Fatal("not started")
86+
}
87+
c.Counts.puts.Add(1)
88+
c.log.Debug("put", "actionID", actionID, "objectID", objectID, "size", size)
89+
file := filepath.Join(c.dir, fmt.Sprintf("o-%s", objectID))
90+
91+
// Special case empty files; they're both common and easier to do race-free.
92+
if size == 0 {
93+
zf, err := os.OpenFile(file, os.O_CREATE|os.O_TRUNC|os.O_RDWR, 0644)
94+
if err != nil {
95+
c.Counts.putErrors.Add(1)
96+
return "", err
97+
}
98+
_ = zf.Close()
99+
} else {
100+
wrote, err := writeAtomic(file, body)
101+
if err != nil {
102+
c.Counts.putErrors.Add(1)
103+
return "", err
104+
}
105+
if wrote != size {
106+
c.Counts.putErrors.Add(1)
107+
return "", fmt.Errorf("wrote %d bytes, expected %d", wrote, size)
108+
}
109+
}
110+
111+
ij, err := json.Marshal(indexEntry{
112+
Version: 1,
113+
OutputID: objectID,
114+
Size: size,
115+
TimeNanos: time.Now().UnixNano(),
116+
})
117+
if err != nil {
118+
c.Counts.putErrors.Add(1)
119+
return "", err
120+
}
121+
actionFile := filepath.Join(c.dir, fmt.Sprintf("a-%s", actionID))
122+
if _, err := writeAtomic(actionFile, bytes.NewReader(ij)); err != nil {
123+
c.Counts.putErrors.Add(1)
124+
return "", err
125+
}
126+
return file, nil
127+
}
128+
129+
func (c *DiskCache) Close() error {
130+
if !c.started {
131+
log.Fatal("not started")
132+
}
133+
c.started = false
134+
c.log.Debug("close")
135+
return nil
136+
}
137+
138+
func writeTempFile(dest string, r io.Reader) (string, int64, error) {
139+
tf, err := os.CreateTemp(filepath.Dir(dest), filepath.Base(dest)+".*")
140+
if err != nil {
141+
return "", 0, err
142+
}
143+
fileName := tf.Name()
144+
defer func() {
145+
_ = tf.Close()
146+
if err != nil {
147+
_ = os.Remove(fileName)
148+
}
149+
}()
150+
size, err := io.Copy(tf, r)
151+
if err != nil {
152+
return "", 0, err
153+
}
154+
return fileName, size, nil
155+
}
156+
157+
func writeAtomic(dest string, r io.Reader) (int64, error) {
158+
tempFile, size, err := writeTempFile(dest, r)
159+
if err != nil {
160+
return 0, err
161+
}
162+
defer func() {
163+
if err != nil {
164+
_ = os.Remove(tempFile)
165+
}
166+
}()
167+
if err = os.Rename(tempFile, dest); err != nil {
168+
return 0, err
169+
}
170+
return size, nil
171+
}

0 commit comments

Comments
 (0)