27
27
28
28
from aiohttp_client_cache import CachedSession , SQLiteBackend
29
29
import aiohttp
30
+ import pandas as pd
30
31
import tqdm
31
32
import tqdm .asyncio
32
33
import yaml
@@ -150,6 +151,7 @@ async def delete_image(session, image, digest, tags, dry_run=False):
150
151
if dry_run :
151
152
fetch = session .get
152
153
verb = "Checking"
154
+ return
153
155
else :
154
156
fetch = session .delete
155
157
verb = "Deleting"
@@ -246,7 +248,7 @@ async def bounded(f, *args, **kwargs):
246
248
247
249
async with CachedSession (
248
250
connector = aiohttp .TCPConnector (limit = 2 * concurrency ),
249
- cache = SQLiteBackend (expire_after = 24 * 3600 ),
251
+ cache = SQLiteBackend (expire_after = 72 * 3600 ),
250
252
** auth_kwargs ,
251
253
) as session :
252
254
@@ -262,6 +264,8 @@ def should_delete_repository(image):
262
264
ci_string in image for ci_string in CI_STRINGS
263
265
):
264
266
return False
267
+ else :
268
+ return True
265
269
266
270
def should_fetch_repository (image ):
267
271
if not any (substring in image for substring in R2D_STRINGS ):
@@ -294,11 +298,11 @@ def should_fetch_repository(image):
294
298
raise RuntimeError (
295
299
f"No images matching prefix { prefix } . Would delete all images!"
296
300
)
297
- print (f"Not deleting { repos_to_keep } images starting with { prefix } " )
301
+ print (f"Not deleting { repos_to_keep } repos starting with { prefix } " )
298
302
if not tag_futures :
299
303
print ("Nothing to delete" )
300
304
return
301
- print (f"{ len (tag_futures )} images to delete (not counting tags)" )
305
+ print (f"{ len (tag_futures )} repos to consider for deletion (not counting tags)" )
302
306
303
307
delete_futures = set ()
304
308
done = set ()
@@ -325,41 +329,64 @@ def should_delete_tag(image, info):
325
329
return False
326
330
327
331
# check cutoff
328
- image_ms = int (info ["timeCreatedMs " ])
332
+ image_ms = int (info ["timeUploadedMs " ])
329
333
image_datetime = datetime .fromtimestamp (image_ms / 1e3 )
330
334
# sanity check timestamps
331
335
if image_datetime < FIVE_YEARS_AGO or image_datetime > TOMORROW :
332
336
raise RuntimeError (
333
337
f"Not deleting image with weird date: { image } , { info } , { image_datetime } "
334
338
)
335
339
if delete_before_ms > image_ms :
336
- # if dry_run:
337
- # print(
338
- # f"\nWould delete {image}:{','.join(info['tag'])} {image_datetime.isoformat()}"
339
- # )
340
340
return True
341
341
else :
342
342
return False
343
343
344
+ def save_stats ():
345
+ df = pd .DataFrame (rows , columns = [
346
+ "image" ,
347
+ "digest" ,
348
+ "tags" ,
349
+ "size" ,
350
+ "date" ,
351
+ ])
352
+ today = datetime .today ()
353
+ fname = f"registry-{ release } -{ today .strftime ('%Y-%m-%d' )} .pkl"
354
+ df .to_pickle (fname )
355
+
356
+ rows = []
344
357
try :
345
358
for f in tqdm .tqdm (
346
359
asyncio .as_completed (tag_futures ),
347
360
total = len (tag_futures ),
348
361
position = 1 ,
349
- desc = "images retrieved" ,
362
+ desc = "repos retrieved" ,
350
363
):
351
364
manifest = await f
352
365
image = manifest ["name" ]
353
366
delete_whole_repo = should_delete_repository (image )
354
367
if delete_whole_repo and len (manifest ["manifest" ]) > 1 :
355
368
delete_progress .total += len (manifest ["manifest" ]) - 1
369
+
356
370
for digest , info in manifest ["manifest" ].items ():
371
+ image_ms = int (info ["timeUploadedMs" ])
372
+ image_datetime = datetime .fromtimestamp (image_ms / 1e3 )
373
+ nbytes = int (info ["imageSizeBytes" ])
374
+ rows .append (
375
+ (
376
+ image ,
377
+ digest ,
378
+ "," .join (info ["tag" ]),
379
+ nbytes ,
380
+ image_datetime ,
381
+ )
382
+ )
383
+ if len (rows ) % 100 == 0 :
384
+ save_stats ()
357
385
if not should_delete_tag (image , info ):
358
386
continue
359
387
if not delete_whole_repo :
360
388
# not counted yet
361
389
delete_progress .total += 1
362
- nbytes = int (info ["imageSizeBytes" ])
363
390
delete_byte_progress .total += nbytes
364
391
f = asyncio .ensure_future (
365
392
bounded (
@@ -389,6 +416,8 @@ def should_delete_tag(image, info):
389
416
if delete_futures :
390
417
await asyncio .gather (* delete_futures )
391
418
finally :
419
+ save_stats ()
420
+
392
421
delete_progress .close ()
393
422
delete_byte_progress .close ()
394
423
print ("\n \n \n \n " )
0 commit comments