From fa48d71223e19a2693fea531ea26257a221d48b2 Mon Sep 17 00:00:00 2001 From: Stuart MacDonald Date: Wed, 22 Oct 2025 13:19:22 -0400 Subject: [PATCH] Add rough cut of 'blobbase' Like interactive rebase for blobs Signed-off-by: Stuart MacDonald --- git-filter-repo | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/git-filter-repo b/git-filter-repo index fb3de42e..9bb66b0f 100755 --- a/git-filter-repo +++ b/git-filter-repo @@ -2029,6 +2029,12 @@ EXAMPLES contents.add_argument('--strip-blobs-with-ids', metavar='BLOB-ID-FILENAME', help=_("Read git object ids from each line of the given file, and " "strip all of them from history")) + contents.add_argument('--blobbase', metavar='BLOB-SHAS-AND-PATHS', + help=_("Process an annotated blob-shas-and-paths.txt file. 'blobbase' " + "because it uses a command system similar to git's interactive " + "rebase. Commands: k, keep = keep blob; s, strip = " + "strip blob. Keep is the default if no command has been " + "specified.")) refrename = parser.add_argument_group(title=_("Renaming of refs " "(see also --refname-callback)")) @@ -2444,6 +2450,16 @@ EXAMPLES args.strip_blobs_with_ids = set(f.read().split()) else: args.strip_blobs_with_ids = set() + # blobbase leverages args.strip_blobs_with_ids + if args.blobbase: + # Commands: k, keep = keep blob; s, strip = strip blob. No command means keep. + with open(args.blobbase, 'br') as f: + # Header lines start with "===" or "Format:" + # Blank lines are blank, comments start with "#" + # Not annotating a blob is interpreted as "keep", keep is keep + # All this has to do is find the strip lines which are distinct from the above lines + # git.git:sequencer.c:parse_insn_line() does not require the command to be in the first column + args.strip_blobs_with_ids.update(parts[1] for parts in (line.split() for line in f) if len(parts) >= 2 and parts[0].decode()[0:1] == 's') if (args.partial or args.refs) and not args.replace_refs: args.replace_refs = 'update-no-add' args.repack = not (args.partial or args.refs or args.no_gc) @@ -2874,6 +2890,7 @@ class RepoAnalyze(object): # List of filenames and sizes in descending order with open(os.path.join(reportdir, b"blob-shas-and-paths.txt"), 'bw') as f: f.write(("=== %s ===\n" % _("Files by sha and associated pathnames in reverse size")).encode()) + f.write(("=== %s ===\n" % _("To use blobbase, prefix lines with commands: k, keep = keep blob; s, strip = strip blob. No command means keep.")).encode()) f.write(_("Format: sha, unpacked size, packed size, filename(s) object stored as\n").encode()) for sha, size in sorted(stats['packed_size'].items(), key=lambda x:(x[1],x[0]), reverse=True):