88from lxml import etree
99from copy import deepcopy
1010import base64
11+ from binascii import Error as BinasciiError
1112import unicodedata
1213import zlib
1314import re
@@ -191,7 +192,6 @@ class UnprotectedStream(Adapter):
191192 provided by get_cipher"""
192193
193194 protected_xpath = '//Value[@Protected=\' True\' ]'
194- unprotected_xpath = '//Value[@Protected=\' False\' ]'
195195
196196 def __init__ (self , protected_stream_key , subcon ):
197197 super (UnprotectedStream , self ).__init__ (subcon )
@@ -201,29 +201,33 @@ def _decode(self, tree, con, path):
201201 cipher = self .get_cipher (self .protected_stream_key (con ))
202202 for elem in tree .xpath (self .protected_xpath ):
203203 if elem .text is not None :
204- result = cipher .decrypt (base64 .b64decode (elem .text )).decode ('utf-8' )
205- # strip invalid XML characters - https://stackoverflow.com/questions/8733233
206- result = re .sub (
207- u'[^\u0020 -\uD7FF \u0009 \u000A \u000D \uE000 -\uFFFD \U00010000 -\U0010FFFF ]+' ,
208- '' ,
209- result
210- )
211- elem .text = result
212- elem .attrib ['Protected' ] = 'False'
204+ try :
205+ result = cipher .decrypt (base64 .b64decode (elem .text )).decode ('utf-8' )
206+ # strip invalid XML characters - https://stackoverflow.com/questions/8733233
207+ result = re .sub (
208+ u'[^\u0020 -\uD7FF \u0009 \u000A \u000D \uE000 -\uFFFD \U00010000 -\U0010FFFF ]+' ,
209+ '' ,
210+ result
211+ )
212+ elem .text = result
213+ except (UnicodeDecodeError , BinasciiError , ValueError ):
214+ # FIXME: this should be a warning eventually, need to fix all databases in tests/ first
215+ log .error (
216+ "Element at {} marked as protected, but could not unprotect" .format (tree .getpath (elem ))
217+ )
213218 return tree
214219
215220 def _encode (self , tree , con , path ):
216221 tree_copy = deepcopy (tree )
217222 cipher = self .get_cipher (self .protected_stream_key (con ))
218- for elem in tree_copy .xpath (self .unprotected_xpath ):
223+ for elem in tree_copy .xpath (self .protected_xpath ):
219224 if elem .text is not None :
220225 elem .text = base64 .b64encode (
221226 cipher .encrypt (
222227 elem .text .encode ('utf-8' )
223228 )
224229 )
225- elem .attrib ['Protected' ] = 'True'
226- return tree
230+ return tree_copy
227231
228232
229233class ARCFourVariantStream (UnprotectedStream ):
0 commit comments