Skip to content

Commit 113b38f

Browse files
emcdclaude
andcommitted
Make security test platform-agnostic.
Replace platform-specific MIME type assertions with pattern-based validation to fix test failures on macOS. The test now validates security behavior (binary files rejected, script files accepted as textual types) rather than exact MIME type detection, ensuring cross-platform compatibility. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent 413cfb6 commit 113b38f

File tree

1 file changed

+24
-35
lines changed

1 file changed

+24
-35
lines changed

tests/test_000_mimeogram/test_500_acquirers.py

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -210,25 +210,25 @@ async def test_400_detect_mime_types( provide_tempdir, provide_auxdata ):
210210
provide_tempdir / "plain.txt",
211211
provide_tempdir / "script.py",
212212
provide_tempdir / "config.toml",
213-
provide_tempdir / "data.yaml",
213+
provide_tempdir / "data.yaml",
214214
provide_tempdir / "service.json",
215215
provide_tempdir / "manifest.xml",
216216
provide_tempdir / "rust_code.rs",
217217
] )
218218

219219
assert len( results ) == 7
220220
mimetypes = { part.mimetype for part in results }
221-
221+
222222
# Existing assertions
223223
assert "text/plain" in mimetypes
224224
assert any( "python" in mt for mt in mimetypes )
225-
225+
226226
# Pattern-based detection assertions for recognized MIME types
227227
assert any(
228228
mt.endswith( '+json' ) or 'json' in mt for mt in mimetypes )
229229
assert any(
230230
mt.endswith( '+xml' ) or 'xml' in mt for mt in mimetypes )
231-
231+
232232
# TOML and YAML files should be accepted via charset fallback
233233
# since Python's mimetypes doesn't recognize them
234234
toml_results = [
@@ -237,7 +237,7 @@ async def test_400_detect_mime_types( provide_tempdir, provide_auxdata ):
237237
p for p in results if p.location.endswith( 'data.yaml' ) ]
238238
assert len( toml_results ) == 1
239239
assert len( yaml_results ) == 1
240-
240+
241241
# Rust files should be accepted (regression test for original issue)
242242
rust_results = [
243243
p for p in results if p.location.endswith( 'rust_code.rs' ) ]
@@ -249,57 +249,46 @@ async def test_400_detect_mime_types( provide_tempdir, provide_auxdata ):
249249
async def test_410_application_x_security( provide_tempdir, provide_auxdata ):
250250
''' Security hardening properly rejects dangerous application/x- types. '''
251251
acquirers = cache_import_module( f"{PACKAGE_NAME}.acquirers" )
252-
253-
# Create test files with binary signatures that puremagic recognizes
254252
binary_files = {
255253
'test.exe': b'MZ\x90\x00' + b'\x00' * 100, # PE header
256254
'test.dmg': b'koly' + b'\x00' * 100, # DMG trailer signature
257255
# Use obviously binary file that won't be detected as having charset
258-
'test.bin': bytes( [ 0xFF, 0x00 ] * 52 ), # Alternating binary
256+
'test.bin': bytes( [ 0xFF, 0x00 ] * 52 ), # Alternating binary
259257
}
260-
261-
# Create safe scripting files
262258
script_files = {
263259
'script.rb': 'puts "Hello, Ruby!"\n',
264-
'script.py': 'print("Hello, Python!")\n',
260+
'script.py': 'print("Hello, Python!")\n',
265261
'script.pl': 'print "Hello, Perl!\\n";\n',
266262
'script.php': '<?php echo "Hello, PHP!"; ?>\n',
267263
}
268-
269264
binary_paths = [ ]
270265
script_paths = [ ]
271-
272266
try:
273-
# Create binary files
274267
for filename, content in binary_files.items( ):
275268
path = provide_tempdir / filename
276269
path.write_bytes( content )
277270
binary_paths.append( path )
278-
279-
# Create script files
280271
for filename, content in script_files.items( ):
281272
path = provide_tempdir / filename
282273
path.write_text( content )
283274
script_paths.append( path )
284-
285-
# Test binary files are rejected in non-strict mode
286275
provide_auxdata.configuration[
287276
'acquire-parts' ][ 'fail-on-invalid' ] = False
288277
binary_results = await acquirers.acquire(
289278
provide_auxdata, binary_paths )
290279
assert len( binary_results ) == 0 # All binary files rejected
291-
292-
# Test script files are accepted
293280
script_results = await acquirers.acquire(
294281
provide_auxdata, script_paths )
295282
assert len( script_results ) == len( script_files )
296-
297-
# Verify MIME types for accepted scripts
298283
script_mimetypes = { part.mimetype for part in script_results }
299-
assert 'application/x-ruby' in script_mimetypes
300-
# Note: .py files might be detected as text/x-python, not app/x-python
284+
for mimetype in script_mimetypes:
285+
assert \
286+
( mimetype.startswith( 'text/' )
287+
or mimetype.startswith( 'application/x-' )
288+
), f"Unexpected MIME type for script: {mimetype}"
289+
# At least one should contain 'python' (most reliable cross-platform)
301290
assert any( 'python' in mt for mt in script_mimetypes )
302-
291+
303292
finally:
304293
# Cleanup
305294
for path in binary_paths + script_paths:
@@ -376,38 +365,38 @@ async def test_525_charset_fallback_validation(
376365
):
377366
''' Enhanced MIME type detection accepts valid structured text files. '''
378367
acquirers = cache_import_module( f"{PACKAGE_NAME}.acquirers" )
379-
368+
380369
# Test that files with unknown extensions but valid text content
381370
# are properly handled
382371
test_files = {
383372
'code.unknown': 'fn main() {\n println!("Hello!");\n}\n',
384-
'config.conf': 'key=value\nsection=main\n',
373+
'config.conf': 'key=value\nsection=main\n',
385374
'data.dat': '{"valid": "json", "content": true}\n',
386375
}
387-
376+
388377
paths_to_cleanup = [ ]
389-
378+
390379
try:
391380
provide_auxdata.configuration[
392381
'acquire-parts' ][ 'fail-on-invalid' ] = False
393-
382+
394383
# Create files with unknown extensions
395384
for filename, content in test_files.items( ):
396385
path = provide_tempdir / filename
397386
path.write_text( content )
398387
paths_to_cleanup.append( path )
399-
388+
400389
results = await acquirers.acquire( provide_auxdata, paths_to_cleanup )
401-
390+
402391
# All text files with unknown extensions should be accepted
403392
# via charset-based fallback (or immediate text/plain detection)
404393
assert len( results ) == 3
405-
394+
406395
# Verify they all have valid charsets
407396
for part in results:
408397
assert part.charset is not None
409398
assert part.charset in [ 'utf-8', 'ascii' ]
410-
399+
411400
# Test that truly empty files are handled appropriately
412401
empty_path = provide_tempdir / 'empty.unknown'
413402
empty_path.write_text( '' )
@@ -416,7 +405,7 @@ async def test_525_charset_fallback_validation(
416405
# Empty files get rejected
417406
assert len( empty_results ) == 0
418407
paths_to_cleanup.append( empty_path )
419-
408+
420409
finally:
421410
# Cleanup
422411
for path in paths_to_cleanup:

0 commit comments

Comments
 (0)