11from contextlib import contextmanager
2- from ctypes import c_char_p , create_string_buffer
2+ from ctypes import create_string_buffer
33from enum import IntEnum
44import math
55
@@ -34,15 +34,19 @@ def format_time(seconds, nanos):
3434
3535class ArchiveEntry :
3636
37- __slots__ = ('_archive_p' , '_entry_p' )
37+ __slots__ = ('_archive_p' , '_entry_p' , 'header_codec' )
3838
39- def __init__ (self , archive_p = None , ** attributes ):
39+ def __init__ (self , archive_p = None , header_codec = 'utf-8' , ** attributes ):
4040 """Allocate memory for an `archive_entry` struct.
4141
42- The attributes are passed to the `modify` method.
42+ The `header_codec` is used to decode and encode file paths and other
43+ attributes.
44+
45+ The `**attributes` are passed to the `modify` method.
4346 """
4447 self ._archive_p = archive_p
4548 self ._entry_p = ffi .entry_new ()
49+ self .header_codec = header_codec
4650 if attributes :
4751 self .modify (** attributes )
4852
@@ -54,7 +58,7 @@ def __str__(self):
5458 """Returns the file's path"""
5559 return self .pathname
5660
57- def modify (self , ** attributes ):
61+ def modify (self , header_codec = None , ** attributes ):
5862 """Convenience method to modify the entry's attributes.
5963
6064 Args:
@@ -83,6 +87,8 @@ def modify(self, **attributes):
8387 rdevmajor (int): major part of the device number
8488 rdevminor (int): minor part of the device number
8589 """
90+ if header_codec :
91+ self .header_codec = header_codec
8692 for name , value in attributes .items ():
8793 setattr (self , name , value )
8894
@@ -112,23 +118,45 @@ def gid(self, gid):
112118
113119 @property
114120 def uname (self ):
115- return ffi .entry_uname_w (self ._entry_p )
121+ uname = ffi .entry_uname_w (self ._entry_p )
122+ if not uname :
123+ uname = ffi .entry_uname (self ._entry_p )
124+ if uname is not None :
125+ try :
126+ uname = uname .decode (self .header_codec )
127+ except UnicodeError :
128+ pass
129+ return uname
116130
117131 @uname .setter
118132 def uname (self , value ):
119133 if not isinstance (value , bytes ):
120- value = value .encode ('utf8' )
121- ffi .entry_update_uname_utf8 (self ._entry_p , value )
134+ value = value .encode (self .header_codec )
135+ if self .header_codec == 'utf-8' :
136+ ffi .entry_update_uname_utf8 (self ._entry_p , value )
137+ else :
138+ ffi .entry_copy_uname (self ._entry_p , value )
122139
123140 @property
124141 def gname (self ):
125- return ffi .entry_gname_w (self ._entry_p )
142+ gname = ffi .entry_gname_w (self ._entry_p )
143+ if not gname :
144+ gname = ffi .entry_gname (self ._entry_p )
145+ if gname is not None :
146+ try :
147+ gname = gname .decode (self .header_codec )
148+ except UnicodeError :
149+ pass
150+ return gname
126151
127152 @gname .setter
128153 def gname (self , value ):
129154 if not isinstance (value , bytes ):
130- value = value .encode ('utf8' )
131- ffi .entry_update_gname_utf8 (self ._entry_p , value )
155+ value = value .encode (self .header_codec )
156+ if self .header_codec == 'utf-8' :
157+ ffi .entry_update_gname_utf8 (self ._entry_p , value )
158+ else :
159+ ffi .entry_copy_gname (self ._entry_p , value )
132160
133161 def get_blocks (self , block_size = ffi .page_size ):
134162 """Read the file's content, keeping only one chunk in memory at a time.
@@ -294,28 +322,48 @@ def pathname(self):
294322 path = ffi .entry_pathname_w (self ._entry_p )
295323 if not path :
296324 path = ffi .entry_pathname (self ._entry_p )
297- try :
298- path = path .decode ()
299- except UnicodeError :
300- pass
325+ if path is not None :
326+ try :
327+ path = path .decode (self .header_codec )
328+ except UnicodeError :
329+ pass
301330 return path
302331
303332 @pathname .setter
304333 def pathname (self , value ):
305334 if not isinstance (value , bytes ):
306- value = value .encode ('utf8' )
307- ffi .entry_update_pathname_utf8 (self ._entry_p , c_char_p (value ))
335+ value = value .encode (self .header_codec )
336+ if self .header_codec == 'utf-8' :
337+ ffi .entry_update_pathname_utf8 (self ._entry_p , value )
338+ else :
339+ ffi .entry_copy_pathname (self ._entry_p , value )
308340
309341 @property
310342 def linkpath (self ):
311- return (ffi .entry_symlink_w (self ._entry_p ) or
343+ path = (
344+ (
345+ ffi .entry_symlink_w (self ._entry_p ) or
346+ ffi .entry_symlink (self ._entry_p )
347+ ) if self .issym else (
312348 ffi .entry_hardlink_w (self ._entry_p ) or
313- ffi .entry_symlink (self ._entry_p ) or
314- ffi .entry_hardlink (self ._entry_p ))
349+ ffi .entry_hardlink (self ._entry_p )
350+ )
351+ )
352+ if isinstance (path , bytes ):
353+ try :
354+ path = path .decode (self .header_codec )
355+ except UnicodeError :
356+ pass
357+ return path
315358
316359 @linkpath .setter
317360 def linkpath (self , value ):
318- ffi .entry_update_link_utf8 (self ._entry_p , value )
361+ if not isinstance (value , bytes ):
362+ value = value .encode (self .header_codec )
363+ if self .header_codec == 'utf-8' :
364+ ffi .entry_update_link_utf8 (self ._entry_p , value )
365+ else :
366+ ffi .entry_copy_link (self ._entry_p , value )
319367
320368 # aliases for compatibility with the standard `tarfile` module
321369 path = property (pathname .fget , pathname .fset , doc = "alias of pathname" )
0 commit comments