fix import error not reported correctly in worker (for process-isolate schedulers)

BerengerBerthoul · BerengerBerthoul · commit 52f3eadfba88 · 2025-07-03T02:11:58.000+02:00
diff --git a/pytest_parallel/process_worker.py b/pytest_parallel/process_worker.py
@@ -19,6 +19,49 @@ def __init__(self, scheduler_ip_address, scheduler_port, session_folder, test_id
     def _file_path(self, when):
         return Path(f'.pytest_parallel/{self.session_folder}/_partial/{self.test_idx}_{when}')
 
+    @pytest.hookimpl(tryfirst=True)
+    def pytest_make_collect_report(self, collector):
+        comm = MPI.COMM_WORLD
+
+        # Here, we are just before the call to the `pytest_make_collect_report` function of pytest
+        # The pytest `pytest_make_collect_report` function is the one that imports the test,
+        # So it is possible that it crashes (e.g. forced exit in a module imported by the test).
+        # Hence, before calling it, we want to register the fact that we at least reached this point
+        if comm.rank == 0:
+            file_path = self._file_path('before_import')
+            with open(file_path, 'w', encoding='utf-8') as f:
+                f.write('before import')
+
+    @pytest.hookimpl(tryfirst=True)
+    def pytest_collectreport(self, report):
+        comm = MPI.COMM_WORLD
+
+        if comm.rank == 0:
+            collect_file = self._file_path('collect')
+
+            # For an unknown reason, `pytest_collectreport` is called several times
+            # However, we only use one 'collect' file
+            # So we need to create the file if it is has not been created yet
+            # If the file already exists, then we overwrite it if it previously passed and we now fail
+            if not collect_file.exists():
+                do_report = True
+            else:
+              with open(collect_file, 'rb') as file:
+                  previous_report_info = file.read()
+                  previous_report_info = pickle.loads(previous_report_info)
+                  if previous_report_info['outcome'] == 'passed' and report.outcome == 'failed':
+                      do_report = True
+                  else:
+                      do_report = False
+
+            if do_report:
+                report_info = {'outcome' : report.outcome,
+                               'longrepr': report.longrepr,
+                               'duration': 0., }
+                with open(collect_file, 'wb') as f:
+                    f.write(pickle.dumps(report_info))
+
+
     @pytest.hookimpl(tryfirst=True)
     def pytest_runtestloop(self, session) -> bool:
         comm = MPI.COMM_WORLD
@@ -27,12 +70,12 @@ def pytest_runtestloop(self, session) -> bool:
         test_comm_size = get_n_proc_for_test(item)
 
         item.sub_comm = comm
-        item.test_info = {'test_idx': self.test_idx, 'fatal_error': None}
+        item.test_info = {'test_idx': self.test_idx, 'fatal_error': None} # TODO 2025-07 not used, remove
 
 
-        # check there is no file from a previous run
+        # check there is no file from a previous run # TODO move this check up the pytest workflow, and complete it with other files
         if comm.rank == 0:
-            for when in ['fatal_error', 'setup', 'call', 'teardown']:
+            for when in ['pre_run_error', 'setup', 'call', 'teardown']:
                 path = self._file_path(when)
                 assert not path.exists(), f'INTERNAL FATAL ERROR in pytest_parallel: file "{path}" should not exist at this point'
 
@@ -42,7 +85,7 @@ def pytest_runtestloop(self, session) -> bool:
                 error_info = f'FATAL ERROR in pytest_parallel with slurm scheduling: test `{item.nodeid}`' \
                              f' uses a `comm` of size {test_comm_size} but was launched with size {comm.size}.\n' \
                              f' This generally indicates that `srun` does not interoperate correctly with MPI.'
-                file_path = self._file_path('fatal_error')
+                file_path = self._file_path('pre_run_error')
                 with open(file_path, 'w', encoding='utf-8') as f:
                     f.write(error_info)
             return True
@@ -51,6 +94,7 @@ def pytest_runtestloop(self, session) -> bool:
         nextitem = None
         run_item_test(item, nextitem, session)
 
+        # TODO 2025-07 not used, remove
         if item.test_info['fatal_error'] is not None:
             assert 0, f'{item.test_info["fatal_error"]}'
 
@@ -61,12 +105,12 @@ def pytest_runtest_makereport(self, item):
         """
         We need to hook to pass the test sub-comm to `pytest_runtest_logreport`,
         and for that we add the sub-comm to the only argument of `pytest_runtest_logreport`, that is, `report`
-        We also need to pass `item.test_info` so that we can update it
+        We also need to pass `item.test_info` so that we can update it # TODO 2025-07 not used, remove
         """
         result = yield
         report = result.get_result()
         report.sub_comm = item.sub_comm
-        report.test_info = item.test_info
+        report.test_info = item.test_info # TODO 2025-07 not used, remove
 
     @pytest.hookimpl(tryfirst=True)
     def pytest_runtest_logreport(self, report):
diff --git a/pytest_parallel/send_report.py b/pytest_parallel/send_report.py
@@ -23,48 +23,114 @@
 def _file_path(when):
     return Path(f'.pytest_parallel/{args._session_folder}/_partial/{args._test_idx}_{when}')
 
-test_info = {'test_idx': args._test_idx, 'fatal_error': None} # TODO no fatal_error=None (absense means no error)
+def _longrepr_from_str(msg):
+    trace_back = ReprTraceback([ReprEntryNative(msg)], None, None)
+    collect_longrepr = []
+    collect_longrepr.append(
+        (trace_back, None, None)
+    )
+    return ExceptionChainRepr(collect_longrepr)
 
-# 'fatal_error' file
-file_path = _file_path('fatal_error')
-if file_path.exists():
-    with open(file_path, 'r', encoding='utf-8') as file:
-        fatal_error = file.read()
-        test_info['fatal_error'] = fatal_error
 
+def _fill_test_info_from_report(test_info, when):
+    assert when in ['setup', 'call', 'teardown']
 
-# 'setup/call/teardown' files
-already_failed = False
-for when in ('setup', 'call', 'teardown'):
     file_path = _file_path(when)
     if file_path.exists():
         try:
             with open(file_path, 'rb') as file:
                 report_info = file.read()
                 report_info = pickle.loads(report_info)
                 test_info[when] = report_info
+            failed = report_info['outcome'] == 'failed'
         except pickle.PickleError:
             test_info['fatal_error'] = f'FATAL ERROR in pytest_parallel : unable to decode {file_path}'
+            failed = True
     else: # Supposedly not found because the test crashed before writing the file
-        collect_longrepr = []
-        msg = 'Error: the test crashed. '
+        msg = f'Error: the test crashed during `{when}` phase. '
         red = 31
         bold = 1
         msg = f'\x1b[{red}m' + f'\x1b[{bold}m' + msg+ '\x1b[0m'
         msg += f'Log file: {args._test_name}\n'
-        trace_back = ReprTraceback([ReprEntryNative(msg)], None, None)
-        collect_longrepr.append(
-            (trace_back, None, None)
-        )
-        longrepr = ExceptionChainRepr(collect_longrepr)
-
-        outcome = 'passed' if already_failed else 'failed' # No need to report the error twice
-        test_info[when] = {'outcome' : outcome,
+        longrepr = _longrepr_from_str(msg)
+
+        test_info[when] = {'outcome' : 'failed',
                            'longrepr': longrepr,
                            'duration': 0, } # unable to report accurately
 
-        already_failed = True
+        failed = True
+    return failed
+
+def _retrieve_test_info():
+    test_info = {'test_idx': args._test_idx, 'fatal_error': None} # TODO no fatal_error=None (absense means no error)
+    for when in ('setup', 'call', 'teardown'):
+        test_info[when] = {'outcome' : 'passed',
+                           'longrepr': _longrepr_from_str(''),
+                           'duration': 0, }
+
+    # During test execution, the following files are created in order:
+    # 1. before_import
+    # 2. collect
+    # 3. pre_run_error
+    # 4. setup
+    # 5. call
+    # 6. teardown
+    # if one of the file is missing, it means there was a crash (except for `pre_run_error`, where it is the other way around)
+      
+    # 1. if `before_import` is not present, we crashed at the very begining
+    if not _file_path('before_import').exists():
+        test_info['fatal_error']  = 'FATAL ERROR in pytest_parallel early processing\n'
+        test_info['fatal_error'] += f'Log file: {args._test_name}\n'
+        return test_info
+
+    # 2. handle collection
+    if not _file_path('collect').exists(): # if `collect` is not present, we crashed during the test collection
+        test_info['fatal_error']  = 'FATAL ERROR in pytest_parallel during test collection\n'
+        test_info['fatal_error'] += f'Log file: {args._test_name}\n'
+        return test_info
+    else: # else we report if the collection failed
+        with open(_file_path('collect'), 'rb') as file:
+            report_info = file.read()
+            report_info = pickle.loads(report_info)
+            if report_info['outcome'] == 'failed':
+                # Note:
+                #   We could send report_info['longrepr'] to master so that it reports it directly
+                #   However, it would be confusing, because master also did the collection phase with no error
+                #   (if there were an error, the worker would not run in the first place)
+                #   To make it clear that the error appears on the worker only, better refer to the report of the worker
+                msg = f'Error: the test crashed during `collect` phase. '
+                red = 31
+                bold = 1
+                msg = f'\x1b[{red}m' + f'\x1b[{bold}m' + msg+ '\x1b[0m'
+                msg += f'Log file: {args._test_name}\n'
+                longrepr = _longrepr_from_str(msg)
+
+                # report as a setup failure (because indeed, the worker failed to setup the test by failing to collect it)
+                test_info['setup'] = {'outcome' : 'failed',
+                                      'longrepr': longrepr,
+                                      'duration': 0, } # unable to report accurately
+                return test_info
+
+    # 3. if `pre_run_error` is present, there was a fatal error in the pytest_parallel test handling
+    file_path = _file_path('pre_run_error')
+    if file_path.exists():
+        with open(file_path, 'r', encoding='utf-8') as file:
+            pre_run_error_msg = file.read()
+            test_info['fatal_error'] = pre_run_error_msg
+        return test_info
+
+    # 4.,5.,6.: 'setup/call/teardown' files
+    for when in ('setup', 'call', 'teardown'):
+        failed = _fill_test_info_from_report(test_info, when)
+        if failed:
+            return test_info
+
+    return test_info
+
+
+
 
+test_info = _retrieve_test_info()
 
 with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
     s.connect((args._scheduler_ip_address, args._scheduler_port))