Skip to content

Commit d0d2ee4

Browse files
committed
Support unicode dirname/filename extended header
1 parent 5160e71 commit d0d2ee4

File tree

5 files changed

+141
-9
lines changed

5 files changed

+141
-9
lines changed

config.h.in

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,9 @@
266266
/* Define to 1 if your <sys/time.h> declares `struct tm'. */
267267
#undef TM_IN_SYS_TIME
268268

269+
/* Define to 1 if you want to support unicode filename */
270+
#undef UNICODE_FILENAME
271+
269272
/* Enable extensions on AIX 3, Interix. */
270273
#ifndef _ALL_SOURCE
271274
# undef _ALL_SOURCE

configure.ac

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,21 @@ if test "x$enable_indicator" = xyes; then
245245
[Define to 1 if you want to use the incremental indicator])
246246
fi
247247

248+
# support unicode filename
249+
AC_MSG_CHECKING(whether unicode filename is supported)
250+
AC_ARG_ENABLE(unicode-filename,
251+
AC_HELP_STRING([--enable-unicode-filename],
252+
[support unicode filename [[default=yes]]]),
253+
,
254+
# default
255+
enable_unicode_filename=yes)
256+
257+
AC_MSG_RESULT($enable_unicode_filename)
258+
if test "x$enable_unicode_filename" = xyes; then
259+
AC_DEFINE(UNICODE_FILENAME, 1,
260+
[Define to 1 if you want to support unicode filename])
261+
fi
262+
248263
# support multibyte filename
249264
AC_MSG_CHECKING(kanji code of filename)
250265
AC_ARG_ENABLE(multibyte-filename,

src/header.c

Lines changed: 117 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -537,9 +537,14 @@ get_extended_header(fp, hdr, header_size, hcrc)
537537
unsigned int *hcrc;
538538
{
539539
char data[LZHEADER_STORAGE];
540-
int name_length;
540+
int name_length = 0;
541541
char dirname[FILENAME_LENGTH];
542542
int dir_length = 0;
543+
#ifdef UNICODE_FILENAME
544+
int name_u_length = 0;
545+
char dirname_u[FILENAME_LENGTH*2];
546+
int dir_u_length = 0;
547+
#endif
543548
int i;
544549
ssize_t whole_size = header_size;
545550
int ext_type;
@@ -634,6 +639,27 @@ get_extended_header(fp, hdr, header_size, hcrc)
634639
#endif
635640

636641
break;
642+
#ifdef UNICODE_FILENAME
643+
case 0x44:
644+
#if DUMP_HEADER
645+
if (verbose_listing && verbose > 1) printf(" < unicode filename >\n");
646+
#endif
647+
/* unicode filename */
648+
name_u_length =
649+
get_bytes(hdr->name_u, header_size-n, sizeof(hdr->name_u)-2);
650+
hdr->name_u[name_u_length] = 0;
651+
hdr->name_u[name_u_length+1] = 0;
652+
break;
653+
case 0x45:
654+
#if DUMP_HEADER
655+
if (verbose_listing && verbose > 1) printf(" < unicode directory >\n");
656+
#endif
657+
/* unicode directory */
658+
dir_u_length = get_bytes(dirname_u, header_size-n, sizeof(dirname_u)-2);
659+
dirname_u[dir_u_length] = 0;
660+
dirname_u[dir_u_length+1] = 0;
661+
break;
662+
#endif
637663
case 0x50:
638664
#if DUMP_HEADER
639665
if (verbose_listing && verbose > 1) printf(" < UNIX permission >\n");
@@ -720,6 +746,23 @@ get_extended_header(fp, hdr, header_size, hcrc)
720746
name_length += dir_length;
721747
}
722748

749+
#ifdef UNICODE_FILENAME
750+
/* concatenate unicode dirname and filename */
751+
if (dir_u_length) {
752+
if (name_u_length + dir_u_length >= sizeof(hdr->name_u)) {
753+
warning("the length of unicode pathname is too long.");
754+
name_u_length = sizeof(hdr->name_u) - dir_u_length - 2;
755+
hdr->name_u[name_u_length] = 0;
756+
hdr->name_u[name_u_length + 1] = 0;
757+
}
758+
memcpy(&dirname_u[dir_u_length], hdr->name_u, name_u_length);
759+
memcpy(hdr->name_u, dirname_u, name_u_length + dir_u_length);
760+
name_u_length += dir_u_length;
761+
hdr->name_u[name_u_length] = 0;
762+
hdr->name_u[name_u_length + 1] = 0;
763+
}
764+
#endif
765+
723766
return whole_size;
724767
}
725768

@@ -1214,10 +1257,14 @@ get_header(fp, hdr)
12141257
filename_case = optional_filename_case;
12151258

12161259
/* kanji code and delimiter conversion */
1217-
convert_filename(hdr->name, strlen(hdr->name), sizeof(hdr->name),
1218-
archive_kanji_code,
1219-
system_kanji_code,
1220-
archive_delim, system_delim, filename_case);
1260+
#ifdef UNICODE_FILENAME
1261+
if (convert_filename_from_unicode(hdr->name_u, hdr->name,
1262+
sizeof(hdr->name), system_kanji_code) == -1)
1263+
#endif
1264+
convert_filename(hdr->name, strlen(hdr->name), sizeof(hdr->name),
1265+
archive_kanji_code,
1266+
system_kanji_code,
1267+
archive_delim, system_delim, filename_case);
12211268

12221269
if ((hdr->unix_mode & UNIX_FILE_SYMLINK) == UNIX_FILE_SYMLINK) {
12231270
char *p;
@@ -1936,7 +1983,7 @@ ConvertUTF8ToEncoding(const char* inUTF8Buf,
19361983
#include <iconv.h>
19371984

19381985
static int
1939-
ConvertEncodingByIconv(const char *src, char *dst, int dstsize,
1986+
ConvertEncodingByIconv(const char *src, int srclen, char *dst, int dstsize,
19401987
const char *srcEnc, const char *dstEnc)
19411988
{
19421989
iconv_t ic;
@@ -1949,7 +1996,7 @@ ConvertEncodingByIconv(const char *src, char *dst, int dstsize,
19491996
dst_p = &szTmpBuf[0];
19501997
iLen = (size_t)sizeof(szTmpBuf)-1;
19511998
src_p = (char *)src;
1952-
sLen = (size_t)strlen(src);
1999+
sLen = (size_t)srclen;
19532000
memset(szTmpBuf, 0, sizeof(szTmpBuf));
19542001
memset(dst, 0, dstsize);
19552002

@@ -1973,6 +2020,67 @@ ConvertEncodingByIconv(const char *src, char *dst, int dstsize,
19732020
}
19742021
#endif /* defined(__APPLE__) */
19752022

2023+
#ifdef UNICODE_FILENAME
2024+
int
2025+
convert_filename_from_unicode(name_u, name, size, to_code)
2026+
char *name_u;
2027+
char *name;
2028+
int size;
2029+
int to_code;
2030+
{
2031+
#if HAVE_ICONV
2032+
char tmp[FILENAME_LENGTH];
2033+
int to_code_save = NONE;
2034+
const char *toEnc;
2035+
int i = 0;
2036+
2037+
if (to_code == CODE_CAP) {
2038+
to_code_save = CODE_CAP;
2039+
to_code = CODE_SJIS;
2040+
}
2041+
2042+
switch (to_code) {
2043+
case CODE_SJIS:
2044+
toEnc = "SJIS";
2045+
break;
2046+
case CODE_EUC:
2047+
toEnc = "EUC-JP";
2048+
break;
2049+
case CODE_UTF8:
2050+
toEnc = "UTF-8";
2051+
break;
2052+
default:
2053+
return -1;
2054+
}
2055+
2056+
while (name_u[i] != 0x00 || name_u[i+1] != 0x00) {
2057+
if ((unsigned char)name_u[i] == LHA_PATHSEP &&
2058+
(unsigned char)name_u[i+1] == LHA_PATHSEP) {
2059+
name_u[i] = 0x2F; name_u[i+1] = 0x00;
2060+
}
2061+
i += 2;
2062+
}
2063+
2064+
if (i == 0)
2065+
return -1;
2066+
2067+
if (ConvertEncodingByIconv(name_u, i, tmp, sizeof(tmp), "UTF-16LE", toEnc) == -1)
2068+
return -1;
2069+
strncpy(name, tmp, size);
2070+
2071+
if (to_code_save == CODE_CAP) {
2072+
sjis_to_cap(tmp, name, sizeof(tmp));
2073+
strncpy(name, tmp, size);
2074+
name[size-1] = 0;
2075+
}
2076+
2077+
return 0;
2078+
#else
2079+
return -1;
2080+
#endif
2081+
}
2082+
#endif
2083+
19762084
char *
19772085
sjis_to_utf8(char *dst, const char *src, size_t dstsize)
19782086
{
@@ -1983,7 +2091,7 @@ sjis_to_utf8(char *dst, const char *src, size_t dstsize)
19832091
kCFStringEncodingUseHFSPlusCanonical) == 0)
19842092
return dst;
19852093
#elif HAVE_ICONV
1986-
if (ConvertEncodingByIconv(src, dst, dstsize, "SJIS", "UTF-8") != -1)
2094+
if (ConvertEncodingByIconv(src, strlen(src), dst, dstsize, "SJIS", "UTF-8") != -1)
19872095
return dst;
19882096
#else
19892097
error("not support utf-8 conversion");
@@ -2007,7 +2115,7 @@ utf8_to_sjis(char *dst, const char *src, size_t dstsize)
20072115
kCFStringEncodingUseHFSPlusCanonical) == 0)
20082116
return dst;
20092117
#elif HAVE_ICONV
2010-
if (ConvertEncodingByIconv(src, dst, dstsize, "UTF-8", "SJIS") != -1)
2118+
if (ConvertEncodingByIconv(src, strlen(src), dst, dstsize, "UTF-8", "SJIS") != -1)
20112119
return dst;
20122120
#else
20132121
error("not support utf-8 conversion");

src/lha.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,9 @@ typedef struct LzHeader {
256256
unsigned char attribute;
257257
unsigned char header_level;
258258
char name[FILENAME_LENGTH];
259+
#ifdef UNICODE_FILENAME
260+
char name_u[FILENAME_LENGTH*2];
261+
#endif
259262
char realname[FILENAME_LENGTH];/* real name for symbolic link */
260263
unsigned int crc; /* file CRC */
261264
boolean has_crc; /* file CRC */

src/prototypes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ boolean get_header(FILE *fp, LzHeader *hdr);
3333
int seek_lha_header(FILE *fp);
3434
void init_header(char *name, struct stat *v_stat, LzHeader *hdr);
3535
void write_header(FILE *fp, LzHeader *hdr);
36+
#ifdef UNICODE_FILENAME
37+
int convert_filename_from_unicode(char *name_u, char *name, int size, int to_code);
38+
#endif
3639
char *sjis_to_utf8(char *dst, const char *src, size_t dstsize);
3740
char *utf8_to_sjis(char *dst, const char *src, size_t dstsize);
3841
void euc2sjis(int *p1, int *p2);

0 commit comments

Comments
 (0)