From f65b07726ddf4f7459101a9c8eed072b0a7d7a41 Mon Sep 17 00:00:00 2001 From: Cass Fino-Radin Date: Wed, 18 Sep 2024 13:01:08 -0400 Subject: [PATCH 1/2] Bag to destination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Could use some more testing, but I think it works! Added new arguments "destination" and "name". If destination is used, the contents of the source directory will all be copied into the "destination", and the bag will be saved there rather than bagging in place. If no "name" is supplied, the bag directory will be named whatever the name of the source directory is. If "name" is supplied, it will be named that. Most importantly — the manifest checksums are generated from the source files, not the copied files, so this provides perfect chain of custody. We do this by modifying make_manifests to check if we are bagging to a destination, and if so, it generates them from the source, and handles the paths accordingly so that they are correct in the manifest. --- bagit.py | 62 ++++++++++++++++++++++++++++++-- locale/en/LC_MESSAGES/.DS_Store | Bin 0 -> 6148 bytes 2 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 locale/en/LC_MESSAGES/.DS_Store diff --git a/bagit.py b/bagit.py index 458fba8..b310dd5 100755 --- a/bagit.py +++ b/bagit.py @@ -20,6 +20,7 @@ from datetime import date from functools import partial from os.path import abspath, isdir, isfile, join +import shutil try: from importlib.metadata import version @@ -144,7 +145,7 @@ def find_locale_dir(): def make_bag( - bag_dir, bag_info=None, processes=1, checksums=None, checksum=None, encoding="utf-8" + bag_dir, bag_info=None, processes=1, checksums=None, checksum=None, encoding="utf-8", destination=None, name=None ): """ Convert a given directory into a bag. You can pass in arbitrary @@ -165,6 +166,35 @@ def make_bag( checksums = DEFAULT_CHECKSUMS bag_dir = os.path.abspath(bag_dir) + + source_dir = None + + # If a destination is provided, copy the directory to the new destination + if destination: + destination = os.path.abspath(destination) + + if name != None: + bagdir = name + else: + bagdir = os.path.basename(bag_dir) + + if not os.path.exists(destination): + os.makedirs(destination) + + # Get the final path where the contents of bag_dir will be copied + destination_subdir = os.path.join(destination, bagdir) + + + # Check if the destination subdirectory already exists + if not os.path.exists(destination_subdir): + # Copy the contents of the source bag_dir to the destination subdirectory + shutil.copytree(bag_dir, destination_subdir) + else: + raise FileExistsError(f"The directory '{destination_subdir}' already exists. Choose a different destination or delete the existing folder.") + + source_dir = bag_dir + bag_dir = destination_subdir # Update bag_dir to point to the new location + cwd = os.path.abspath(os.path.curdir) if cwd.startswith(bag_dir) and cwd != bag_dir: @@ -240,9 +270,14 @@ def make_bag( # original directory os.chmod("data", os.stat(cwd).st_mode) - total_bytes, total_files = make_manifests( - "data", processes, algorithms=checksums, encoding=encoding + if source_dir != None: + total_bytes, total_files = make_manifests( + source_dir, processes, algorithms=checksums, encoding=encoding ) + else: + total_bytes, total_files = make_manifests( + "data", processes, algorithms=checksums, encoding=encoding + ) LOGGER.info(_("Creating bagit.txt")) txt = """BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8\n""" @@ -1273,6 +1308,9 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" with open_text_file(manifest_filename, "w", encoding=encoding) as manifest: for digest, filename, byte_count in values: + if data_dir != "data": + relative_path = os.path.relpath(filename, data_dir) + filename = os.path.join("data", relative_path) manifest.write("%s %s\n" % (digest, _encode_filename(filename))) num_files[algorithm] += 1 total_bytes[algorithm] += byte_count @@ -1509,6 +1547,21 @@ def _make_parser(): ), ) + parser.add_argument( + "--destination", + help=_( + "The directory where the bag will be created (default: the same as the" + " source directory)" + ), + ) + + parser.add_argument( + "--name", + help=_( + "this is what the encolsing directory will be named. If not provided it will use tha name of the source directory" + ), + ) + checksum_args = parser.add_argument_group( _("Checksum Algorithms"), _( @@ -1544,6 +1597,7 @@ def _make_parser(): ), ) + return parser @@ -1610,6 +1664,8 @@ def main(): bag_info=args.bag_info, processes=args.processes, checksums=args.checksums, + destination=args.destination, + name=args.name, ) except Exception as exc: LOGGER.error( diff --git a/locale/en/LC_MESSAGES/.DS_Store b/locale/en/LC_MESSAGES/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..e63ab86219d5caf9e2ae0ec10cef6c137392ee91 GIT binary patch literal 6148 zcmeHKO-sW-5Pe&FsCckPkNE={{0Fh7=%IQkdex=|6|^Z$dgv`b-8ZvCrPQJzNRgQ` z`*vq%_Tvp?x&Xpp`E(BS0raSXokKQXh|G&_NyjXLLDy(-iaAzj@UrFYhJ9o}a(9dZ z6FlRUdHc*?g&EG6&1&8hqh;MJ=sD(Zg_?dP-jU)S72dF9zoa!q&UG4UiaU-=EYv!s zrMCGBQ}dtEQ(~@k-)mY6j&_|S<9 zVP@DykK#pn+zPp zVPES1aQFLvGss8IfHUx~7zo{boDX; Date: Wed, 18 Sep 2024 13:44:52 -0400 Subject: [PATCH 2/2] single file support and bug fix - i think my previous commit accidentally broke bag in place --- bagit.py | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/bagit.py b/bagit.py index b310dd5..8813570 100755 --- a/bagit.py +++ b/bagit.py @@ -185,12 +185,19 @@ def make_bag( destination_subdir = os.path.join(destination, bagdir) - # Check if the destination subdirectory already exists - if not os.path.exists(destination_subdir): - # Copy the contents of the source bag_dir to the destination subdirectory - shutil.copytree(bag_dir, destination_subdir) + if os.path.isfile(bag_dir): + # If it's a file, copy it directly to the destination + if not os.path.exists(destination_subdir): + os.makedirs(destination_subdir) + destination_file = os.path.join(destination_subdir, os.path.basename(bag_dir)) + shutil.copy2(bag_dir, destination_file) # copy2 preserves metadata else: - raise FileExistsError(f"The directory '{destination_subdir}' already exists. Choose a different destination or delete the existing folder.") + # Check if the destination subdirectory already exists + if not os.path.exists(destination_subdir): + # Copy the contents of the source bag_dir to the destination subdirectory + shutil.copytree(bag_dir, destination_subdir) + else: + raise FileExistsError(f"The directory '{destination_subdir}' already exists. Choose a different destination or delete the existing folder.") source_dir = bag_dir bag_dir = destination_subdir # Update bag_dir to point to the new location @@ -1284,13 +1291,22 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" manifest_line_generator = partial(generate_manifest_lines, algorithms=algorithms) + # check if data_dir is a directory or a file + if os.path.isfile(data_dir): + # handle solitary file: create a single entry list + file_entries = [data_dir] + is_single_file = True + else: + file_entries = _walk(data_dir) + is_single_file = False + if processes > 1: pool = multiprocessing.Pool(processes=processes) - checksums = pool.map(manifest_line_generator, _walk(data_dir)) + checksums = pool.map(manifest_line_generator, file_entries) pool.close() pool.join() else: - checksums = [manifest_line_generator(i) for i in _walk(data_dir)] + checksums = [manifest_line_generator(i) for i in file_entries] # At this point we have a list of tuples which start with the algorithm name: manifest_data = {} @@ -1308,9 +1324,16 @@ def make_manifests(data_dir, processes, algorithms=DEFAULT_CHECKSUMS, encoding=" with open_text_file(manifest_filename, "w", encoding=encoding) as manifest: for digest, filename, byte_count in values: - if data_dir != "data": + if is_single_file: + + if data_dir != "data": + relative_path = os.path.basename(filename) + filename = os.path.join("data", relative_path) + else: relative_path = os.path.relpath(filename, data_dir) - filename = os.path.join("data", relative_path) + if data_dir != "data": + relative_path = os.path.relpath(filename, data_dir) + filename = os.path.join("data", relative_path) manifest.write("%s %s\n" % (digest, _encode_filename(filename))) num_files[algorithm] += 1 total_bytes[algorithm] += byte_count