Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions src/main/java/org/duraspace/bagit/BagWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
Expand All @@ -30,8 +31,8 @@ public class BagWriter {
private final Set<BagItDigest> tagAlgorithms;
private final Set<BagItDigest> payloadAlgorithms;

private final Map<BagItDigest, Map<File, String>> payloadRegistry;
private final Map<BagItDigest, Map<File, String>> tagFileRegistry;
private final Map<BagItDigest, LinkedHashMap<File, String>> payloadRegistry;
private final Map<BagItDigest, LinkedHashMap<File, String>> tagFileRegistry;
private final Map<String, Map<String, String>> tagRegistry;

/**
Expand Down Expand Up @@ -97,7 +98,7 @@ public File getRootDir() {
* @param algorithm Checksum digest algorithm name (e.g., "SHA-1")
* @param filemap Map of Files to checksum values
*/
public void registerChecksums(final BagItDigest algorithm, final Map<File, String> filemap) {
public void registerChecksums(final BagItDigest algorithm, final LinkedHashMap<File, String> filemap) {
if (!payloadAlgorithms.contains(algorithm)) {
throw new IllegalArgumentException("Invalid algorithm: " + algorithm);
}
Expand All @@ -112,7 +113,8 @@ public void registerChecksums(final BagItDigest algorithm, final Map<File, Strin
* @param values Map containing field/value pairs
*/
public void addTags(final String key, final Map<String, String> values) {
final Map<String, String> tagValues = tagRegistry.computeIfAbsent(key, k -> new HashMap<>());
final Map<String, String> tagValues =
tagRegistry.computeIfAbsent(key, k -> new LinkedHashMap<>());
tagValues.putAll(values);
}

Expand Down Expand Up @@ -146,7 +148,7 @@ public void write() throws IOException {
* @param registerToTags flag to check if the hash of the output should be stored in the {@code tagFileRegistry}
* @throws IOException if there's an error writing to the OutputStream
*/
private void writeManifests(final String prefix, final Map<BagItDigest, Map<File, String>> registry,
private void writeManifests(final String prefix, final Map<BagItDigest, LinkedHashMap<File, String>> registry,
final boolean registerToTags) throws IOException {
final String delimiter = " ";
final char backslash = '\\';
Expand Down Expand Up @@ -224,7 +226,8 @@ private OutputStream streamFor(final Path file) throws IOException {

private void addTagChecksum(final BagItDigest algorithm, final File f, final MessageDigest digest) {
if (digest != null) {
final Map<File, String> m = tagFileRegistry.computeIfAbsent(algorithm, key -> new HashMap<>());
final LinkedHashMap<File, String> m =
tagFileRegistry.computeIfAbsent(algorithm, key -> new LinkedHashMap<>());
m.put(f, HexEncoder.toString(digest.digest()));
}
}
Expand Down
26 changes: 25 additions & 1 deletion src/main/java/org/duraspace/bagit/serialize/BagSerializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,18 @@
* @author mikejritter
* @since 2020-02-24
*/
@FunctionalInterface
public interface BagSerializer {

/**
* Default date/time (in milliseconds since epoch) to set for Zip Entries
* that do not have a last modified date. If the date/time is not set
* then it will default to current system date/time.
* This is less than ideal, as it causes the MD5 checksum of Zip file to
* change whenever a Zip file is regenerated (even if compressed files are unchanged).
* 1589346000 seconds * 1000 = May 13, 2020 GMT (the date BagIt-Support 1.0.0 was released)
*/
long DEFAULT_MODIFIED_DATE = 1589346000L * 1000;

/**
* Serialize a BagIt bag depending on the format defined by the implementing class. This only puts the files into
* an archive, with the name of the {@code root} directory serving as the name of the final file.
Expand All @@ -26,4 +35,19 @@ public interface BagSerializer {
*/
Path serialize(Path root) throws IOException;

/**
* Serialize a BagIt bag and set file creation, last modified, and access times for each zip entry.
* Setting these times is required to ensure that MD5 checksums of identical bags created at
* different times will match.
*
* This only puts the files into an archive, with the name of the{@code root} directory serving
* as the name of the final file.
*
* @param root the {@link Path} which is the top level directory of the BagIt bag
* @param lastModifiedTime the time (in milliseconds) to set time fields in file metadata
* @return the {@link Path} to the serialized BagIt bag
* @throws IOException if there is an error writing to the archive
* @throws UnsupportedOperationException if the child class does not implement this method
*/
Path serializeWithTimestamp(Path root, Long lastModifiedTime) throws IOException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,8 @@ public Path serialize(final Path root) throws IOException {
return serializedBag;
}

@Override
public Path serializeWithTimestamp(final Path root, final Long lastModifiedTime) throws IOException {
throw new UnsupportedOperationException("This method is not supported.");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,9 @@ public Path serialize(final Path root) throws IOException {

return serializedBag;
}

@Override
public Path serializeWithTimestamp(final Path root, final Long lastModifiedTime) throws IOException {
throw new UnsupportedOperationException("This method is not supported.");
}
}
67 changes: 67 additions & 0 deletions src/main/java/org/duraspace/bagit/serialize/ZipBagSerializer.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,19 @@
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.FileTime;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Iterator;
import java.util.stream.Stream;

import org.apache.commons.compress.archivers.zip.X000A_NTFS;
import org.apache.commons.compress.archivers.zip.X5455_ExtendedTimestamp;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveOutputStream;
import org.apache.commons.io.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* Serialize a BagIt bag into a zip archive without compression
Expand All @@ -24,6 +31,8 @@
public class ZipBagSerializer implements BagSerializer {
private final String extension = ".zip";

private final Logger logger = LoggerFactory.getLogger(ZipBagSerializer.class);

@Override
public Path serialize(final Path root) throws IOException {
final Path parent = root.getParent().toAbsolutePath();
Expand Down Expand Up @@ -52,4 +61,62 @@ public Path serialize(final Path root) throws IOException {

return serializedBag;
}

@Override
public Path serializeWithTimestamp(final Path root, final Long lastModifiedTime) throws IOException {
logger.info("Serializing bag with timestamp: {}", root.getFileName());

final Path parent = root.getParent().toAbsolutePath();
final String bagName = root.getFileName().toString();
final DateFormat df = new SimpleDateFormat("MM/dd/yyyy");

final Path serializedBag = parent.resolve(bagName + extension);
try(final OutputStream os = Files.newOutputStream(serializedBag);
final ZipArchiveOutputStream zip = new ZipArchiveOutputStream(os);
final Stream<Path> files = Files.walk(root)) {

// Use given last modified time or the default value; avoid an invalid timestamp
final FileTime time;
if ((lastModifiedTime != null) && (lastModifiedTime > 0)) {
time = FileTime.fromMillis(lastModifiedTime);
} else {
time = FileTime.fromMillis(DEFAULT_MODIFIED_DATE);
}

// it would be nice not to have to collect the files which are walked, but we're required to try/catch
// inside of a lambda which isn't the prettiest. maybe a result could be returned which contains either a
// Path or the Exception thrown... just an idea
final Iterator<Path> itr = files.iterator();
while (itr.hasNext()) {
final Path bagEntry = itr.next();
final String name = parent.relativize(bagEntry).toString();
final ZipArchiveEntry entry = zip.createArchiveEntry(bagEntry.toFile(), name);

logger.debug("Setting ZipArchiveEntry creation, last modified and last access times to: {}",
df.format(time.toMillis()));

Files.setLastModifiedTime(bagEntry, time);

final X5455_ExtendedTimestamp extendedTimestamp = new X5455_ExtendedTimestamp();
extendedTimestamp.setCreateFileTime(time);
extendedTimestamp.setModifyFileTime(time);
extendedTimestamp.setAccessFileTime(time);
entry.addExtraField(extendedTimestamp);

final X000A_NTFS ntfsTimestamp = new X000A_NTFS();
ntfsTimestamp.setCreateFileTime(time);
ntfsTimestamp.setModifyFileTime(time);
ntfsTimestamp.setAccessFileTime(time);
entry.addExtraField(ntfsTimestamp);

zip.putArchiveEntry(entry);
if (bagEntry.toFile().isFile()) {
FileUtils.copyFile(bagEntry.toFile(), zip);
}
zip.closeArchiveEntry();
}
}

return serializedBag;
}
}
26 changes: 18 additions & 8 deletions src/test/java/org/duraspace/bagit/BagWriterTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.time.LocalDate;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
Expand Down Expand Up @@ -96,12 +96,18 @@ public void write() throws IOException {
Files.createDirectories(bag);
final BagWriter writer = new BagWriter(bag.toFile(), Sets.newHashSet(sha1, sha256, sha512));

// Setup the data files
// Set up the data files
final Path data = bag.resolve("data");
final Path file = Files.createFile(data.resolve(filename));
final Map<File, String> sha1Sums = Maps.newHashMap(file.toFile(), HexEncoder.toString(sha1MD.digest()));
final Map<File, String> sha256Sums = Maps.newHashMap(file.toFile(), HexEncoder.toString(sha256MD.digest()));
final Map<File, String> sha512Sums = Maps.newHashMap(file.toFile(), HexEncoder.toString(sha512MD.digest()));

final LinkedHashMap<File, String> sha1Sums = new LinkedHashMap<>();
sha1Sums.put(file.toFile(), HexEncoder.toString(sha1MD.digest()));

final LinkedHashMap<File, String> sha256Sums = new LinkedHashMap<>();
sha256Sums.put(file.toFile(), HexEncoder.toString(sha256MD.digest()));

final LinkedHashMap<File, String> sha512Sums = new LinkedHashMap<>();
sha512Sums.put(file.toFile(), HexEncoder.toString(sha512MD.digest()));

// second file
final Path file2 = Files.createFile(data.resolve(filename + "2"));
Expand Down Expand Up @@ -170,8 +176,12 @@ public void testWriteDistinctManifests() throws Exception {
// Setup the data files
final Path data = bag.resolve("data");
final Path file = Files.createFile(data.resolve(filename));
final Map<File, String> sha1Sums = Maps.newHashMap(file.toFile(), HexEncoder.toString(sha1MD.digest()));
final Map<File, String> sha256Sums = Maps.newHashMap(file.toFile(), HexEncoder.toString(sha256MD.digest()));

final LinkedHashMap<File, String> sha1Sums = new LinkedHashMap<>();
sha1Sums.put(file.toFile(), HexEncoder.toString(sha1MD.digest()));

final LinkedHashMap<File, String> sha256Sums = new LinkedHashMap<>();
sha256Sums.put(file.toFile(), HexEncoder.toString(sha256MD.digest()));

// second file
final Path file2 = Files.createFile(data.resolve(filename + "2"));
Expand Down Expand Up @@ -274,7 +284,7 @@ public void testAddInvalidAlgorithm() throws IOException {
final BagWriter writer = new BagWriter(bag.toFile(), Sets.newHashSet(sha1));

// we don't need to pass any files, just the errant BagItDigest
writer.registerChecksums(sha256, Collections.emptyMap());
writer.registerChecksums(sha256, new LinkedHashMap<>());
});
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,24 @@
package org.duraspace.bagit.serialize;

import static org.assertj.core.api.Assertions.assertThat;
import static org.duraspace.bagit.serialize.BagSerializer.DEFAULT_MODIFIED_DATE;
import static org.junit.jupiter.api.Assertions.assertEquals;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.attribute.FileTime;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;

import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.duraspace.bagit.profile.BagProfile;
import org.junit.jupiter.api.BeforeEach;
Expand Down Expand Up @@ -69,6 +73,28 @@ public void testZipSerializer() throws Exception {
Files.delete(writtenBag);
}

@Test
public void testZipSerializerWithTimestamp() throws IOException {
final BagSerializer zipper = SerializationSupport.serializerFor("zip", profile);
final Path writtenBag = zipper.serializeWithTimestamp(bag, DEFAULT_MODIFIED_DATE);

assertThat(writtenBag).exists();
assertThat(writtenBag).isRegularFile();

// just make sure we can read it
try (ZipArchiveInputStream zipIn = new ZipArchiveInputStream(Files.newInputStream(writtenBag))) {
ZipArchiveEntry entry;
while ((entry = zipIn.getNextEntry()) != null) {
assertThat(bagFiles).contains(Paths.get(entry.getName()));
assertEquals(entry.getCreationTime(), FileTime.fromMillis(DEFAULT_MODIFIED_DATE));
assertEquals(entry.getLastModifiedTime(), FileTime.fromMillis(DEFAULT_MODIFIED_DATE));
assertEquals(entry.getLastAccessTime(), FileTime.fromMillis(DEFAULT_MODIFIED_DATE));
}
}

Files.delete(writtenBag);
}

@Test
public void testTarSerializer() throws Exception {
final BagSerializer serializer = SerializationSupport.serializerFor("tar", profile);
Expand Down Expand Up @@ -107,5 +133,4 @@ public void testGZipSerializer() throws Exception {

Files.delete(writtenBag);
}

}