@@ -210,25 +210,25 @@ async def test_400_detect_mime_types( provide_tempdir, provide_auxdata ):
210
210
provide_tempdir / "plain.txt" ,
211
211
provide_tempdir / "script.py" ,
212
212
provide_tempdir / "config.toml" ,
213
- provide_tempdir / "data.yaml" ,
213
+ provide_tempdir / "data.yaml" ,
214
214
provide_tempdir / "service.json" ,
215
215
provide_tempdir / "manifest.xml" ,
216
216
provide_tempdir / "rust_code.rs" ,
217
217
] )
218
218
219
219
assert len ( results ) == 7
220
220
mimetypes = { part .mimetype for part in results }
221
-
221
+
222
222
# Existing assertions
223
223
assert "text/plain" in mimetypes
224
224
assert any ( "python" in mt for mt in mimetypes )
225
-
225
+
226
226
# Pattern-based detection assertions for recognized MIME types
227
227
assert any (
228
228
mt .endswith ( '+json' ) or 'json' in mt for mt in mimetypes )
229
229
assert any (
230
230
mt .endswith ( '+xml' ) or 'xml' in mt for mt in mimetypes )
231
-
231
+
232
232
# TOML and YAML files should be accepted via charset fallback
233
233
# since Python's mimetypes doesn't recognize them
234
234
toml_results = [
@@ -237,7 +237,7 @@ async def test_400_detect_mime_types( provide_tempdir, provide_auxdata ):
237
237
p for p in results if p .location .endswith ( 'data.yaml' ) ]
238
238
assert len ( toml_results ) == 1
239
239
assert len ( yaml_results ) == 1
240
-
240
+
241
241
# Rust files should be accepted (regression test for original issue)
242
242
rust_results = [
243
243
p for p in results if p .location .endswith ( 'rust_code.rs' ) ]
@@ -249,57 +249,46 @@ async def test_400_detect_mime_types( provide_tempdir, provide_auxdata ):
249
249
async def test_410_application_x_security ( provide_tempdir , provide_auxdata ):
250
250
''' Security hardening properly rejects dangerous application/x- types. '''
251
251
acquirers = cache_import_module ( f"{ PACKAGE_NAME } .acquirers" )
252
-
253
- # Create test files with binary signatures that puremagic recognizes
254
252
binary_files = {
255
253
'test.exe' : b'MZ\x90 \x00 ' + b'\x00 ' * 100 , # PE header
256
254
'test.dmg' : b'koly' + b'\x00 ' * 100 , # DMG trailer signature
257
255
# Use obviously binary file that won't be detected as having charset
258
- 'test.bin' : bytes ( [ 0xFF , 0x00 ] * 52 ), # Alternating binary
256
+ 'test.bin' : bytes ( [ 0xFF , 0x00 ] * 52 ), # Alternating binary
259
257
}
260
-
261
- # Create safe scripting files
262
258
script_files = {
263
259
'script.rb' : 'puts "Hello, Ruby!"\n ' ,
264
- 'script.py' : 'print("Hello, Python!")\n ' ,
260
+ 'script.py' : 'print("Hello, Python!")\n ' ,
265
261
'script.pl' : 'print "Hello, Perl!\\ n";\n ' ,
266
262
'script.php' : '<?php echo "Hello, PHP!"; ?>\n ' ,
267
263
}
268
-
269
264
binary_paths = [ ]
270
265
script_paths = [ ]
271
-
272
266
try :
273
- # Create binary files
274
267
for filename , content in binary_files .items ( ):
275
268
path = provide_tempdir / filename
276
269
path .write_bytes ( content )
277
270
binary_paths .append ( path )
278
-
279
- # Create script files
280
271
for filename , content in script_files .items ( ):
281
272
path = provide_tempdir / filename
282
273
path .write_text ( content )
283
274
script_paths .append ( path )
284
-
285
- # Test binary files are rejected in non-strict mode
286
275
provide_auxdata .configuration [
287
276
'acquire-parts' ][ 'fail-on-invalid' ] = False
288
277
binary_results = await acquirers .acquire (
289
278
provide_auxdata , binary_paths )
290
279
assert len ( binary_results ) == 0 # All binary files rejected
291
-
292
- # Test script files are accepted
293
280
script_results = await acquirers .acquire (
294
281
provide_auxdata , script_paths )
295
282
assert len ( script_results ) == len ( script_files )
296
-
297
- # Verify MIME types for accepted scripts
298
283
script_mimetypes = { part .mimetype for part in script_results }
299
- assert 'application/x-ruby' in script_mimetypes
300
- # Note: .py files might be detected as text/x-python, not app/x-python
284
+ for mimetype in script_mimetypes :
285
+ assert \
286
+ ( mimetype .startswith ( 'text/' )
287
+ or mimetype .startswith ( 'application/x-' )
288
+ ), f"Unexpected MIME type for script: { mimetype } "
289
+ # At least one should contain 'python' (most reliable cross-platform)
301
290
assert any ( 'python' in mt for mt in script_mimetypes )
302
-
291
+
303
292
finally :
304
293
# Cleanup
305
294
for path in binary_paths + script_paths :
@@ -376,38 +365,38 @@ async def test_525_charset_fallback_validation(
376
365
):
377
366
''' Enhanced MIME type detection accepts valid structured text files. '''
378
367
acquirers = cache_import_module ( f"{ PACKAGE_NAME } .acquirers" )
379
-
368
+
380
369
# Test that files with unknown extensions but valid text content
381
370
# are properly handled
382
371
test_files = {
383
372
'code.unknown' : 'fn main() {\n println!("Hello!");\n }\n ' ,
384
- 'config.conf' : 'key=value\n section=main\n ' ,
373
+ 'config.conf' : 'key=value\n section=main\n ' ,
385
374
'data.dat' : '{"valid": "json", "content": true}\n ' ,
386
375
}
387
-
376
+
388
377
paths_to_cleanup = [ ]
389
-
378
+
390
379
try :
391
380
provide_auxdata .configuration [
392
381
'acquire-parts' ][ 'fail-on-invalid' ] = False
393
-
382
+
394
383
# Create files with unknown extensions
395
384
for filename , content in test_files .items ( ):
396
385
path = provide_tempdir / filename
397
386
path .write_text ( content )
398
387
paths_to_cleanup .append ( path )
399
-
388
+
400
389
results = await acquirers .acquire ( provide_auxdata , paths_to_cleanup )
401
-
390
+
402
391
# All text files with unknown extensions should be accepted
403
392
# via charset-based fallback (or immediate text/plain detection)
404
393
assert len ( results ) == 3
405
-
394
+
406
395
# Verify they all have valid charsets
407
396
for part in results :
408
397
assert part .charset is not None
409
398
assert part .charset in [ 'utf-8' , 'ascii' ]
410
-
399
+
411
400
# Test that truly empty files are handled appropriately
412
401
empty_path = provide_tempdir / 'empty.unknown'
413
402
empty_path .write_text ( '' )
@@ -416,7 +405,7 @@ async def test_525_charset_fallback_validation(
416
405
# Empty files get rejected
417
406
assert len ( empty_results ) == 0
418
407
paths_to_cleanup .append ( empty_path )
419
-
408
+
420
409
finally :
421
410
# Cleanup
422
411
for path in paths_to_cleanup :
0 commit comments