|
6 | 6 | from core.utils.common import conditional_atomic, db_is_not_sqlite, load_func |
7 | 7 | from core.utils.db import fast_first |
8 | 8 | from django.conf import settings |
9 | | -from django.db.models import BooleanField, Case, Count, Exists, F, Max, OuterRef, Q, QuerySet, Value, When |
| 9 | +from django.db.models import Case, Count, Exists, F, Max, OuterRef, Q, QuerySet, When |
10 | 10 | from django.db.models.fields import DecimalField |
11 | 11 | from projects.functions.stream_history import add_stream_history |
12 | 12 | from projects.models import Project |
@@ -75,37 +75,32 @@ def _try_tasks_with_overlap(tasks: QuerySet[Task]) -> Tuple[Union[Task, None], Q |
75 | 75 | return None, tasks.filter(overlap=1) |
76 | 76 |
|
77 | 77 |
|
78 | | -def _try_breadth_first( |
79 | | - tasks: QuerySet[Task], user: User, project: Project, attempt_gt_first: bool = False |
80 | | -) -> Union[Task, None]: |
| 78 | +def _try_breadth_first(tasks: QuerySet[Task], user: User, project: Project) -> Union[Task, None]: |
81 | 79 | """Try to find tasks with maximum amount of annotations, since we are trying to label tasks as fast as possible""" |
82 | 80 |
|
83 | | - # Exclude ground truth annotations from the count when not in onboarding window |
84 | | - # to prevent GT tasks from being prioritized via breadth-first logic |
85 | | - annotation_filter = ~Q(annotations__completed_by=user) |
86 | | - if not attempt_gt_first: |
87 | | - annotation_filter &= ~Q(annotations__ground_truth=True) |
| 81 | + if project.annotator_evaluation_enabled: |
| 82 | + # When annotator evaluation is enabled, ground truth tasks accumulate overlap regardless of the maximum annotations setting. |
| 83 | + # If we include them, they will eventually be front-loaded by the breadth first logic. |
| 84 | + # So we exclude them from the candidates. |
| 85 | + # Onboarding tasks are served by _try_ground_truth. |
| 86 | + # When no in progress tasks are found by breadth first, the next step in the pipeline will serve the remaining GT tasks. |
| 87 | + tasks = _annotate_has_ground_truths(tasks) |
| 88 | + tasks = tasks.filter(has_ground_truths=False) |
88 | 89 |
|
89 | | - tasks = tasks.annotate(annotations_count=Count('annotations', filter=annotation_filter)) |
| 90 | + tasks = tasks.annotate(annotations_count=Count('annotations', filter=~Q(annotations__completed_by=user))) |
90 | 91 | max_annotations_count = tasks.aggregate(Max('annotations_count'))['annotations_count__max'] |
91 | | - if max_annotations_count == 0: |
92 | | - # there is no any labeled tasks found |
93 | | - return |
94 | | - |
95 | | - # find any task with maximal amount of created annotations |
96 | | - not_solved_tasks_labeling_started = tasks.annotate( |
97 | | - reach_max_annotations_count=Case( |
98 | | - When(annotations_count=max_annotations_count, then=Value(True)), |
99 | | - default=Value(False), |
100 | | - output_field=BooleanField(), |
101 | | - ) |
102 | | - ) |
103 | | - not_solved_tasks_labeling_with_max_annotations = not_solved_tasks_labeling_started.filter( |
104 | | - reach_max_annotations_count=True |
105 | | - ) |
106 | | - if not_solved_tasks_labeling_with_max_annotations.exists(): |
107 | | - # try to complete tasks that are already in progress |
108 | | - return _get_random_unlocked(not_solved_tasks_labeling_with_max_annotations, user) |
| 92 | + |
| 93 | + if max_annotations_count == 0 or max_annotations_count is None: |
| 94 | + # No tasks with annotations, let the next step in the pipeline handle it |
| 95 | + return None |
| 96 | + |
| 97 | + # Find tasks at the maximum amount of annotations |
| 98 | + candidates = tasks.filter(annotations_count=max_annotations_count) |
| 99 | + if candidates.exists(): |
| 100 | + # Select randomly from candidates |
| 101 | + result = _get_random_unlocked(candidates, user) |
| 102 | + return result |
| 103 | + return None |
109 | 104 |
|
110 | 105 |
|
111 | 106 | def _try_uncertainty_sampling( |
@@ -289,7 +284,7 @@ def get_next_task_without_dm_queue( |
289 | 284 | if not next_task and project.maximum_annotations > 1: |
290 | 285 | # if there are already labeled tasks, but task.overlap still < project.maximum_annotations, randomly sampling from them |
291 | 286 | logger.debug(f'User={user} tries depth first from prepared tasks') |
292 | | - next_task = _try_breadth_first(not_solved_tasks, user, project, attempt_gt_first) |
| 287 | + next_task = _try_breadth_first(not_solved_tasks, user, project) |
293 | 288 | if next_task: |
294 | 289 | queue_info += (' & ' if queue_info else '') + 'Breadth first queue' |
295 | 290 |
|
|
0 commit comments