Fix LPID - proper list of pushes, classify all tasks in single .update where possible.

jmaher · jmaher · commit 7c073becb334 · 2025-09-10T17:15:21.000-07:00
diff --git a/treeherder/log_parser/intermittents.py b/treeherder/log_parser/intermittents.py
@@ -1,6 +1,37 @@
 import datetime
 
-from treeherder.model.models import Group, GroupStatus, Job, Push
+from treeherder.model.models import Group, GroupStatus, Job, Push, TextLogError
+
+
+def classify(jobs_to_classify, jobs_to_unclassify):
+    # TODO: consider job.result=(busted, exception)
+    if jobs_to_classify:
+        target_jobs = Job.objects.filter(
+            id__in=jobs_to_classify, result="testfailed", failure_classification_id__in=[1, 6]
+        )
+        if target_jobs:
+            target_jobs.update(failure_classification_id=8)
+
+    if jobs_to_unclassify:
+        # TODO: query text_log_error for new_failure and use 6 if previously set
+        new_jobs = (
+            TextLogError.objects.filter(
+                job__id__in=jobs_to_unclassify, new_failure=True, job__failure_classification_id=8
+            )
+            .values("job__id")
+            .distinct()
+        )
+        jobs_to_six = [j["job__id"] for j in new_jobs]
+        jobs_to_one = list(set(jobs_to_unclassify) - set(jobs_to_six))
+
+        if jobs_to_six:
+            target_jobs = Job.objects.filter(id__in=jobs_to_six, result="testfailed")
+            if target_jobs:
+                target_jobs.update(failure_classification_id=6)
+        if jobs_to_one:
+            target_jobs = Job.objects.filter(id__in=jobs_to_one, result="testfailed")
+            if target_jobs:
+                target_jobs.update(failure_classification_id=1)
 
 
 def _check_and_mark_infra(current_job, job_ids, push_ids):
@@ -42,7 +73,7 @@ def _check_and_mark_infra(current_job, job_ids, push_ids):
 
     # ignore previous classified, we are looking for NEW extra jobs
     if len([ej for ej in extra_jobs if ej["failure_classification_id"] != 8]) == 0:
-        return
+        return [], []
 
     # ensure 50% 'success' rate
     # success here means the task ran and produced groups | is success
@@ -52,20 +83,24 @@ def _check_and_mark_infra(current_job, job_ids, push_ids):
         if job["id"] not in job_ids and job["result"] != "success":
             extra_failed.append(job)
 
+    jobs_to_classify = []
+    jobs_to_unclassify = []
+
     # look for failure rate > 50% and exit early
     if len(extra_failed) / len(extra_jobs) > 0.5:
         # as failure rate > 50%, if any jobs are fc_id=8 classify as fc_id=1
         for job in extra_failed:
             if job["failure_classification_id"] == 8:
-                Job.objects.filter(id=job["id"]).update(failure_classification_id=1)
-        return
+                jobs_to_unclassify.append(job["id"])
 
     # any extra_jobs will be failures without groups (infra/timeout/etc.)
     # theoretically there could be many jobs here
     # mark extra_jobs as `intermittent_needs_classification`
     for job in extra_failed:
         if job["failure_classification_id"] not in [4, 8]:
-            Job.objects.filter(id=job["id"]).update(failure_classification_id=8)
+            jobs_to_classify.append(job["id"])
+
+    return jobs_to_classify, jobs_to_unclassify
 
 
 def check_and_mark_intermittent(job_id):
@@ -91,7 +126,7 @@ def check_and_mark_intermittent(job_id):
         )
         counter = -1
         for id in idlist:
-            if id == current_job.push.id:
+            if id["id"] == current_job.push.id:
                 counter = 0
                 continue
             if counter < 0:
@@ -100,7 +135,7 @@ def check_and_mark_intermittent(job_id):
                 break
             elif current_job.repository.id != 77 and counter >= 3:
                 break
-            ids.append(id)
+            ids.append(id["id"])
             counter += 1
 
     all_groups = (
@@ -135,7 +170,8 @@ def check_and_mark_intermittent(job_id):
     # If no groups, look for infra
     distinct_job_ids = list(set([f["job_logs__job__id"] for f in all_groups]))
     if len(distinct_job_ids) == 1:
-        return _check_and_mark_infra(current_job, distinct_job_ids, ids)
+        to_classify, to_unclassify = _check_and_mark_infra(current_job, distinct_job_ids, ids)
+        return classify(to_classify, to_unclassify)
 
     mappings = {}
     job_classifications = {}
@@ -151,6 +187,7 @@ def check_and_mark_intermittent(job_id):
             # we have a variant
             continue
 
+        # TODO: consider storing a list of job.id that are fc_id=8
         # store job:fc_id so we can reference what needs changed
         if item["job_logs__job__id"] not in job_classifications:
             job_classifications[item["job_logs__job__id"]] = item[
@@ -181,18 +218,14 @@ def check_and_mark_intermittent(job_id):
     current_changed_groups = {}
     for group in mappings.get(current_job.push.id, {}).get("groups", []):
         all_data = []
-        current_data = []
+        current_data = [
+            mappings[current_job.push.id]["groups"][group][j]
+            for j in mappings[current_job.push.id]["groups"][group]
+        ]
         for id in mappings.keys():
             all_data.extend(
                 [mappings[id]["groups"][group][j] for j in mappings[id]["groups"].get(group, {})]
             )
-            if id == current_job.push.id:
-                current_data.extend(
-                    [
-                        mappings[id]["groups"][group][j]
-                        for j in mappings[id]["groups"].get(group, {})
-                    ]
-                )
 
         # if new data changes results, update
         pass_rate = len([s for s in all_data if s == GroupStatus.OK]) / len(all_data)
@@ -203,9 +236,9 @@ def check_and_mark_intermittent(job_id):
             current_changed_groups[group] = True
 
     # all changed_groups need to be evaluated on previous 'failed' jobs to ensure all groups in that task are 'passing'
+    jobs_to_classify = []  # mark as fcid=8 (known intermittent)
+    jobs_to_unclassify = []  # previously parked as fcid=8, new failing data, now fcid=1
     for id in mappings.keys():
-        jobs_to_classify = []  # mark as fcid=8 (known intermittent)
-        jobs_to_unclassify = []  # previously parked as fcid=8, new failing data, now fcid=1
         for job in mappings[id]["jobs"]:
             all_green = True
             current_all_green = True
@@ -229,19 +262,7 @@ def check_and_mark_intermittent(job_id):
             elif job_classifications[job] == 8:
                 jobs_to_unclassify.append(job)
 
-        # TODO: consider job.result=(busted, exception)
-        for job in jobs_to_classify:
-            target_job = Job.objects.filter(
-                id=job, result="testfailed", failure_classification_id__in=[1, 6]
-            )
-            if target_job:
-                target_job.update(failure_classification_id=8)
-
-        for job in jobs_to_unclassify:
-            target_job = Job.objects.filter(
-                id=job, result="testfailed", failure_classification_id=8
-            )
-            if target_job:
-                target_job.update(failure_classification_id=1)
-
-    return _check_and_mark_infra(current_job, distinct_job_ids, ids)
+    to_classify, to_unclassify = _check_and_mark_infra(current_job, distinct_job_ids, ids)
+    jobs_to_classify.extend(to_classify)
+    jobs_to_unclassify.extend(to_unclassify)
+    return classify(jobs_to_classify, jobs_to_unclassify)