Skip to content

Commit 5bc2feb

Browse files
committed
[Bugfix] Handle archive entry with UTF-8 properly
Handle UTF-8 archive entry properly by parsing them with QString then updating the archive entry with 'archive_entry_set_pathname_w' with wide characters. This fixes messed up extracted filenames in Windows mostly, since Unix based OS seems to be unaffected but we apply this fix regardless of the OS just to be safe. Fixes #49 Signed-off-by: Divya Antony J.R <[email protected]>
1 parent fb634e1 commit 5bc2feb

9 files changed

+116
-28
lines changed

Diff for: .github/workflows/tests.yml

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
name: Tests
22

33
on:
4+
push:
5+
branches:
6+
- "*"
47
pull_request:
58
branches:
69
- master
@@ -66,7 +69,7 @@ jobs:
6669
strategy:
6770
fail-fast: false
6871
matrix:
69-
macos_version: [11, 12]
72+
macos_version: [12]
7073
qt6: ['enabled', 'disabled']
7174
shared: ['shared', 'static']
7275

Diff for: include/qarchiveutils_p.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,5 @@ int archiveWriteOpenQIODevice(struct archive* archive, QIODevice* device);
2929
/* Basic string manupilators. */
3030
char* concat(const char*, const char*);
3131
QString getDirectoryFileName(const QString&);
32+
bool isUTF8(const char*);
3233
#endif // QARCHIVE_UTILS_PRIVATE_HPP_INCLUDED

Diff for: src/qarchiveextractor_p.cc

+45-27
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ QJsonObject getArchiveEntryInformation(archive_entry* entry, bool bExcluded) {
177177
#if (QT_VERSION >= QT_VERSION_CHECK(5, 8, 0))
178178
QDateTime::fromSecsSinceEpoch(lastAccessT)
179179
#else
180-
QDateTime::fromTime_t(lastAccessT)
180+
QDateTime::fromTime_t(lastAccessT)
181181
#endif
182182
)
183183
.toString(Qt::ISODate)));
@@ -759,32 +759,33 @@ short ExtractorPrivate::extract() {
759759
return ArchiveWriteError;
760760
}
761761
}
762-
for (;;) {
763-
if (m_CurrentArchiveEntry) {
764-
err = writeData(m_CurrentArchiveEntry);
765-
if (err == OperationPaused) {
766-
return err;
767-
}
768-
if (err) { // NoError = 0
769-
m_ArchiveRead.clear();
770-
m_ArchiveWrite.clear();
771-
return err;
772-
}
773-
++n_ProcessedEntries;
774-
775-
// Report final progress signal after extracting the file fully.
776-
if (n_BytesTotal > 0 && n_TotalEntries > 0) {
777-
emit progress(archive_entry_pathname(m_CurrentArchiveEntry),
778-
n_ProcessedEntries, n_TotalEntries, n_BytesProcessed,
779-
n_BytesTotal);
780-
} else {
781-
emit progress(archive_entry_pathname(m_CurrentArchiveEntry), 1, 1, 1,
782-
1);
783-
}
784762

785-
archive_entry_clear(m_CurrentArchiveEntry);
786-
m_CurrentArchiveEntry = nullptr;
763+
if (m_CurrentArchiveEntry) {
764+
err = writeData(m_CurrentArchiveEntry);
765+
if (err == OperationPaused) {
766+
return err;
767+
}
768+
if (err) { // NoError = 0
769+
m_ArchiveRead.clear();
770+
m_ArchiveWrite.clear();
771+
return err;
772+
}
773+
++n_ProcessedEntries;
774+
775+
// Report final progress signal after extracting the file fully.
776+
if (n_BytesTotal > 0 && n_TotalEntries > 0) {
777+
emit progress(archive_entry_pathname(m_CurrentArchiveEntry),
778+
n_ProcessedEntries, n_TotalEntries, n_BytesProcessed,
779+
n_BytesTotal);
780+
} else {
781+
emit progress(archive_entry_pathname(m_CurrentArchiveEntry), 1, 1, 1, 1);
787782
}
783+
784+
archive_entry_clear(m_CurrentArchiveEntry);
785+
m_CurrentArchiveEntry = nullptr;
786+
}
787+
788+
for (;;) {
788789
ret = archive_read_next_header(m_ArchiveRead.data(), &entry);
789790
if (ret == ARCHIVE_EOF) {
790791
break;
@@ -853,14 +854,14 @@ short ExtractorPrivate::writeData(struct archive_entry* entry) {
853854

854855
if (!b_MemoryMode && b_RawMode && !m_RawOutputFilename.isEmpty()) {
855856
const auto& path = (QFileInfo(archive_entry_pathname(entry)).path() +
856-
QString::fromLatin1("/") + m_RawOutputFilename)
857+
QString::fromUtf8("/") + m_RawOutputFilename)
857858
.toStdWString();
858859
archive_entry_copy_pathname_w(entry, path.c_str());
859860
}
860861
if (b_hasBasePath) {
861862
const auto& relativePath =
862863
m_basePath
863-
.relativeFilePath(QString::fromLatin1("/") +
864+
.relativeFilePath(QString::fromUtf8("/") +
864865
archive_entry_pathname(entry))
865866
.toStdWString();
866867
if (relativePath == L".") { // Root directory
@@ -885,6 +886,23 @@ short ExtractorPrivate::writeData(struct archive_entry* entry) {
885886
#endif
886887
if (m_CurrentArchiveEntry != entry) {
887888
if (!b_MemoryMode) {
889+
// UTF-8 in archive entry messes up when extracting under Windows
890+
// when UTF-8 is not set, to fix this we first get the archive
891+
// entry pathname in raw bytes then convert it to wide characters
892+
// and set it has the new pathname which should make libarchive
893+
// handle it better.
894+
895+
// Get current pathname
896+
auto ptname_cstr = archive_entry_pathname(entry);
897+
898+
// Check if UTF-8
899+
if (isUTF8(ptname_cstr)) {
900+
auto ptname = QString::fromUtf8(ptname_cstr);
901+
902+
auto wstr = ptname.toStdWString();
903+
archive_entry_copy_pathname_w(entry, wstr.c_str());
904+
}
905+
888906
ret = archive_write_header(m_ArchiveWrite.data(), entry);
889907
} else {
890908
currentNode.setFileInformation(getArchiveEntryInformation(entry, false));

Diff for: src/qarchiveutils_p.cc

+10
Original file line numberDiff line numberDiff line change
@@ -206,3 +206,13 @@ QString getDirectoryFileName(const QString& dir) {
206206
}
207207
return dir;
208208
}
209+
210+
bool isUTF8(const char* src) {
211+
auto s = QString::fromUtf8(src);
212+
auto latin1 = QString::fromLatin1(src);
213+
214+
// Latin1 converts utf8 chars to ?
215+
// so we can easily check if the string has
216+
// utf8 characters
217+
return s != latin1;
218+
}

Diff for: tests/QArchiveDiskCompressorTests.cc

+24
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ void QArchiveDiskCompressorTests::initTestCase() {
1414
dir.mkpath(TestCase7OutputDir);
1515
dir.mkpath(TestCase8OutputDir);
1616
dir.mkpath(TestCase9OutputDir);
17+
dir.mkpath(TestCase10OutputDir);
1718
}
1819

1920
void QArchiveDiskCompressorTests::simpleCompression() {
@@ -256,6 +257,29 @@ void QArchiveDiskCompressorTests::compressingSpecialCharacterFiles() {
256257
QVERIFY(QFileInfo::exists(TestCase9ArchivePath));
257258
}
258259

260+
void QArchiveDiskCompressorTests::compressMultiLevelSpecialCharacterFiles() {
261+
QArchive::DiskCompressor e(TestCase10ArchivePath);
262+
263+
/* Write the file to compress and add it. */
264+
QFile TestOutput(TemporaryFilePath);
265+
QVERIFY((TestOutput.open(QIODevice::WriteOnly)) == true);
266+
TestOutput.write(Test10OutputContents.toUtf8());
267+
TestOutput.close();
268+
269+
e.addFiles(/*entry name(optional)=*/Test10EntryName, TemporaryFilePath);
270+
271+
QObject::connect(&e, &QArchive::DiskCompressor::error, this,
272+
&QArchiveDiskCompressorTests::defaultErrorHandler);
273+
QSignalSpy spyInfo(&e, SIGNAL(finished()));
274+
e.start();
275+
276+
/* Must emit exactly one signal. */
277+
QVERIFY(spyInfo.wait() || spyInfo.count());
278+
279+
/* The archive should also exists. */
280+
QVERIFY(QFileInfo::exists(TestCase10ArchivePath));
281+
}
282+
259283
void QArchiveDiskCompressorTests::defaultErrorHandler(short code,
260284
const QString& file) {
261285
auto scode = QString::number(code);

Diff for: tests/QArchiveDiskCompressorTests.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ class QArchiveDiskCompressorTests : public QObject, private QArchiveTestCases {
2929
void compressingTarArchiveWithZSTD();
3030
void compressEmptyFiles();
3131
void compressingSpecialCharacterFiles();
32+
void compressMultiLevelSpecialCharacterFiles();
3233

3334
protected slots:
3435
static void defaultErrorHandler(short code, const QString& file);

Diff for: tests/QArchiveDiskExtractorTests.cc

+19
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,25 @@ void QArchiveDiskExtractorTests::extractSpecialCharacterFiles() {
245245
TestOutput.close();
246246
}
247247

248+
void QArchiveDiskExtractorTests::extractMultiLevelSpecialCharacterFiles() {
249+
QArchive::DiskExtractor e(TestCase10ArchivePath, TestCase10OutputDir);
250+
QObject::connect(&e, &QArchive::DiskExtractor::error, this,
251+
&QArchiveDiskExtractorTests::defaultErrorHandler);
252+
253+
QFile TestOutput;
254+
QSignalSpy spyInfo(&e, SIGNAL(finished()));
255+
e.start();
256+
257+
QVERIFY(spyInfo.wait() || spyInfo.count() == 1);
258+
259+
TestOutput.setFileName(Test10OutputFile);
260+
261+
QVERIFY(TestOutput.exists() == true);
262+
QVERIFY((TestOutput.open(QIODevice::ReadOnly)) == true);
263+
QVERIFY(Test10OutputContents == QString::fromUtf8(TestOutput.readAll()));
264+
TestOutput.close();
265+
}
266+
248267
void QArchiveDiskExtractorTests::isExtractorObjectReuseable() {
249268
QArchive::DiskExtractor e(TestCase5ArchivePath, TestCase5OutputDir);
250269
QObject::connect(&e, &QArchive::DiskExtractor::error, this,

Diff for: tests/QArchiveDiskExtractorTests.hpp

+1
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class QArchiveDiskExtractorTests : public QObject, private QArchiveTestCases {
3030
void extractTarArchiveWithNoFilters();
3131
void extractTarArchiveWithZSTD();
3232
void extractSpecialCharacterFiles();
33+
void extractMultiLevelSpecialCharacterFiles();
3334
void isExtractorObjectReuseable();
3435
void testProgress();
3536
protected slots:

Diff for: tests/QArchiveTestCases.hpp

+11
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,12 @@ class QArchiveTestCases {
5757
TestCase9ArchivePath = TestCasesDir + "Test9.zip";
5858
TestCase9OutputDir = TestOutputDir + "Test9";
5959
Test9OutputFile = TestCase9OutputDir + "/新建文件.txt";
60+
61+
TestCase10ArchivePath = TestCasesDir + "Test10.zip";
62+
TestCase10OutputDir = TestOutputDir + "Test10";
63+
Test10EntryName = "Дирек1/Другойрежиссер/Тест10.txt";
64+
Test10OutputFile =
65+
TestCase10OutputDir + "/Дирек1/Другойрежиссер/Тест10.txt";
6066
}
6167

6268
~QArchiveTestCases() = default;
@@ -91,6 +97,10 @@ class QArchiveTestCases {
9197
QString TestCase9ArchivePath;
9298
QString TestCase9OutputDir;
9399
QString Test9OutputFile;
100+
QString TestCase10ArchivePath;
101+
QString TestCase10OutputDir;
102+
QString Test10EntryName;
103+
QString Test10OutputFile;
94104
QString TemporaryFilePath;
95105

96106
protected:
@@ -105,6 +115,7 @@ class QArchiveTestCases {
105115
const QString Test6OutputContents = "TEST6SUCCESS!";
106116
const QString Test7OutputContents = "TEST7SUCCESS!";
107117
const QString Test9OutputContents = "测试9成功!";
118+
const QString Test10OutputContents = "Успешное выполнение теста 10!";
108119
};
109120

110121
#endif // QARCHIVE_TEST_CASES_HPP_INCLUDED

0 commit comments

Comments
 (0)