Skip to content

Commit 80d5785

Browse files
authored
Merge pull request #24 from intel/2024Q4
Release 24Q4
2 parents d220e31 + d83b49d commit 80d5785

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+3369
-447
lines changed

Dockerfile.gaudi

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
FROM golang:1.22@sha256:a66eda637829ce891e9cf61ff1ee0edf544e1f6c5b0e666c7310dce231a66f28 as build
15+
FROM golang:1.23.4@sha256:70031844b8c225351d0bb63e2c383f80db85d92ba894e3da7e13bcf80efa9a37 as build
1616
ARG LOCAL_LICENSES
1717
WORKDIR /build
1818
COPY . .

Dockerfile.gpu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
FROM golang:1.22@sha256:a66eda637829ce891e9cf61ff1ee0edf544e1f6c5b0e666c7310dce231a66f28 as build
15+
FROM golang:1.23.4@sha256:70031844b8c225351d0bb63e2c383f80db85d92ba894e3da7e13bcf80efa9a37 as build
1616
ARG LOCAL_LICENSES
1717
WORKDIR /build
1818
COPY . .

Dockerfile.qat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
FROM golang:1.22@sha256:a66eda637829ce891e9cf61ff1ee0edf544e1f6c5b0e666c7310dce231a66f28 as build
15+
FROM golang:1.23.4@sha256:70031844b8c225351d0bb63e2c383f80db85d92ba894e3da7e13bcf80efa9a37 as build
1616
ARG LOCAL_LICENSES
1717
WORKDIR /build
1818
COPY . .

Makefile

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ include $(CURDIR)/qat.mk
6666

6767

6868
.PHONY: build
69-
build: gpu gaudi qat bin/intel-cdi-specs-generator
69+
build: gpu gaudi qat bin/intel-cdi-specs-generator bin/device-faker
7070

7171

7272
bin/intel-cdi-specs-generator: cmd/cdi-specs-generator/*.go $(GPU_COMMON_SRC)
@@ -135,18 +135,24 @@ clean-licenses:
135135
licenses: clean-licenses
136136
GO111MODULE=on go run github.com/google/go-licenses@$(GOLICENSES_VERSION) \
137137
save \
138-
"./cmd/cdi-specs-generator" \
139138
"./cmd/kubelet-gaudi-plugin" \
140139
"./cmd/kubelet-gpu-plugin" \
141-
"./cmd/qat-showdevice" \
142140
"./cmd/kubelet-qat-plugin" \
143-
"./pkg/version" \
144-
"./pkg/gpu/cdihelpers" \
145-
"./pkg/gpu/device" \
146-
"./pkg/gpu/discovery" \
141+
"./cmd/cdi-specs-generator" \
142+
"./cmd/device-faker" \
143+
"./cmd/qat-showdevice" \
147144
"./pkg/gaudi/cdihelpers" \
148145
"./pkg/gaudi/device" \
149146
"./pkg/gaudi/discovery" \
147+
"./pkg/gpu/cdihelpers" \
148+
"./pkg/gpu/device" \
149+
"./pkg/gpu/discovery" \
150+
"./pkg/qat/cdi" \
151+
"./pkg/qat/device" \
152+
"./pkg/helpers" \
153+
"./pkg/fakesysfs" \
154+
"./pkg/plugintesthelpers" \
155+
"./pkg/version" \
150156
--save_path licenses
151157

152158

@@ -159,7 +165,7 @@ format:
159165
gofmt -w -s -l ./
160166

161167
cilint:
162-
golangci-lint --max-same-issues 0 --max-issues-per-linter 0 run ./...
168+
golangci-lint --max-same-issues 0 --max-issues-per-linter 0 run --timeout 2m0s ./...
163169

164170
vet:
165171
go vet $(PKG)/...
@@ -185,7 +191,14 @@ yamllint:
185191
.PHONY: test coverage
186192
COVERAGE_FILE := coverage.out
187193
test:
188-
go test -v -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
194+
go test -v -coverprofile=$(COVERAGE_FILE) $(shell go list ./... | grep -v "test/e2e")
189195

190196
coverage: test
191197
go tool cover -html=$(COVERAGE_FILE) -o coverage.html
198+
@echo coverage file: coverage.html
199+
@echo "average coverage (except main.go files)"
200+
grep '<option value=' coverage.html | grep -v 'main.go' | grep -o '(.*)' | tr -d '()%' | awk 'BEGIN{s=0;}{s+=$$1;}END{print s/NR;}'
201+
202+
.PHONY: e2e-qat
203+
e2e-qat:
204+
go test -v ./test/e2e/... --clean-start=true -ginkgo.v -ginkgo.trace -ginkgo.show-node-events

cmd/cdi-specs-generator/main.go

Lines changed: 81 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ var (
3838
"gpu": true,
3939
"gaudi": true,
4040
}
41-
version = "v0.2.0"
41+
version = "v0.3.0"
4242
)
4343

4444
func main() {
@@ -50,6 +50,60 @@ func main() {
5050
}
5151
}
5252

53+
func cobraRunFunc(cmd *cobra.Command, args []string) error {
54+
cdiDir := cmd.Flag("cdi-dir").Value.String()
55+
namingStyle := cmd.Flag("naming").Value.String()
56+
57+
fmt.Println("Refreshing CDI registry")
58+
if err := cdiapi.Configure(cdiapi.WithSpecDirs(cdiDir)); err != nil {
59+
fmt.Printf("unable to refresh the CDI registry: %v", err)
60+
return err
61+
}
62+
63+
cdiCache, err := cdiapi.NewCache(cdiapi.WithAutoRefresh(false), cdiapi.WithSpecDirs(cdiDir))
64+
if err != nil {
65+
return err
66+
}
67+
68+
dryRun := false
69+
if cmd.Flag("dry-run").Value.String() == "true" {
70+
dryRun = true
71+
}
72+
73+
for _, argx := range args {
74+
switch strings.ToLower(argx) {
75+
case "gpu":
76+
if err := handleGPUDevices(cdiCache, namingStyle, dryRun); err != nil {
77+
return err
78+
}
79+
case "gaudi":
80+
if err := handleGaudiDevices(cdiCache, namingStyle, dryRun); err != nil {
81+
return err
82+
}
83+
}
84+
}
85+
86+
if dryRun {
87+
return nil
88+
}
89+
90+
if err := cdiCache.Refresh(); err != nil {
91+
return err
92+
}
93+
94+
// Fix CDI spec permissions as the default permission (600) prevents
95+
// use without root or sudo:
96+
// https://github.com/cncf-tags/container-device-interface/issues/224
97+
specs := cdiCache.GetVendorSpecs(gpuDevice.CDIVendor) // Vendor is same for both gpu and gaudi
98+
for _, spec := range specs {
99+
if err := os.Chmod(spec.GetPath(), 0o644); err != nil {
100+
return err
101+
}
102+
}
103+
104+
return nil
105+
}
106+
53107
func newCommand() *cobra.Command {
54108
cmd := &cobra.Command{
55109
Use: "intel-cdi-specs-generator [--cdi-dir=<cdi directory>] [--naming=<style>] <gpu | gaudi>",
@@ -69,69 +123,38 @@ func newCommand() *cobra.Command {
69123

70124
return nil
71125
},
72-
RunE: func(cmd *cobra.Command, args []string) error {
73-
cdiDir := cmd.Flag("cdi-dir").Value.String()
74-
namingStyle := cmd.Flag("naming").Value.String()
75-
76-
fmt.Println("Refreshing CDI registry")
77-
if err := cdiapi.Configure(cdiapi.WithSpecDirs(cdiDir)); err != nil {
78-
fmt.Printf("unable to refresh the CDI registry: %v", err)
79-
return err
80-
}
81-
82-
cdiCache, err := cdiapi.NewCache(cdiapi.WithAutoRefresh(false), cdiapi.WithSpecDirs(cdiDir))
83-
if err != nil {
84-
return err
85-
}
86-
87-
for _, argx := range args {
88-
switch strings.ToLower(argx) {
89-
case "gpu":
90-
if err := handleGPUDevices(cdiCache, namingStyle); err != nil {
91-
return err
92-
}
93-
case "gaudi":
94-
if err := handleGaudiDevices(cdiCache, namingStyle); err != nil {
95-
return err
96-
}
97-
}
98-
}
99-
100-
if err := cdiCache.Refresh(); err != nil {
101-
return err
102-
}
103-
104-
// Fix CDI spec permissions as the default permission (600) prevents
105-
// use without root or sudo:
106-
// https://github.com/cncf-tags/container-device-interface/issues/224
107-
specs := cdiCache.GetVendorSpecs(gpuDevice.CDIVendor) // Vendor is same for both gpu and gaudi
108-
for _, spec := range specs {
109-
if err := os.Chmod(spec.GetPath(), 0o644); err != nil {
110-
return err
111-
}
112-
}
113-
114-
return nil
115-
},
126+
RunE: cobraRunFunc,
116127
}
117128

118129
cmd.Version = version
119130
cmd.Flags().BoolP("version", "v", false, "Show the version of the binary")
120131
cmd.Flags().String("cdi-dir", "/etc/cdi", "CDI spec directory")
121132
cmd.Flags().String("naming", "classic", "Naming of CDI devices. Options: classic, machine")
133+
cmd.Flags().BoolP("dry-run", "n", false, "Dry-run, do not create CDI manifests")
122134
cmd.SetVersionTemplate("Intel CDI Specs Generator Version: {{.Version}}\n")
123135

124136
return cmd
125137
}
126138

127-
func handleGPUDevices(cdiCache *cdiapi.Cache, namingStyle string) error {
139+
func handleGPUDevices(cdiCache *cdiapi.Cache, namingStyle string, dryRun bool) error {
128140
sysfsDir := gpuDevice.GetSysfsRoot()
129141

142+
fmt.Println("Scanning for GPUs")
143+
130144
detectedDevices := gpuDiscovery.DiscoverDevices(sysfsDir, namingStyle)
131145
if len(detectedDevices) == 0 {
132146
fmt.Println("No supported devices detected")
133147
}
134148

149+
fmt.Println("Detected supported devices")
150+
for gpuName, gpu := range detectedDevices {
151+
fmt.Printf("GPU: %v=%v (%v)\n", gpuDevice.CDIKind, gpuName, gpu.ModelName)
152+
}
153+
154+
if dryRun {
155+
return nil
156+
}
157+
135158
// syncDetectedDevicesWithCdiRegistry overrides uid in detecteddevices from existing cdi spec
136159
if err := gpuCdihelpers.SyncDetectedDevicesWithRegistry(cdiCache, detectedDevices, true); err != nil {
137160
fmt.Printf("unable to sync detected devices to CDI registry: %v", err)
@@ -141,14 +164,25 @@ func handleGPUDevices(cdiCache *cdiapi.Cache, namingStyle string) error {
141164
return nil
142165
}
143166

144-
func handleGaudiDevices(cdiCache *cdiapi.Cache, namingStyle string) error {
167+
func handleGaudiDevices(cdiCache *cdiapi.Cache, namingStyle string, dryRun bool) error {
145168
sysfsDir := gaudiDevice.GetSysfsRoot()
146169

170+
fmt.Println("Scanning for Gaudi accelerators")
171+
147172
detectedDevices := gaudiDiscovery.DiscoverDevices(sysfsDir, namingStyle)
148173
if len(detectedDevices) == 0 {
149174
fmt.Println("No supported devices detected")
150175
}
151176

177+
fmt.Println("Detected supported devices")
178+
for gaudiName, gaudi := range detectedDevices {
179+
fmt.Printf("Gaudi: %v=%v (%v)\n", gaudiDevice.CDIKind, gaudiName, gaudi.ModelName)
180+
}
181+
182+
if dryRun {
183+
return nil
184+
}
185+
152186
// syncDetectedDevicesWithCdiRegistry overrides uid in detecteddevices from existing cdi spec
153187
if err := gaudiCdihelpers.SyncDetectedDevicesWithRegistry(cdiCache, detectedDevices, true); err != nil {
154188
fmt.Printf("unable to sync detected devices to CDI registry: %v", err)

0 commit comments

Comments
 (0)