Skip to content

University distributor

UniversityDistributor

Source code in june/distributors/university_distributor.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
class UniversityDistributor:
    """ """
    def __init__(self, universities: List[University]):
        """
        For each university it searches in the nearby areas for students living
        in student households. Once it has enough to fill the university, it stops
        searching and fills the university.

        Parameters
        ----------
        universities
            a list of universities to fill
        max_number_of_areas
            maximum number of neighbour areas to look for students
        """
        self.universities = universities
        self.min_student_age = 18
        self.max_student_age = 24

        # Optimization caches
        self._area_distance_cache: Dict[int, Dict[float, np.ndarray]] = {}
        self._global_student_pools: Dict[str, Set[int]] = {}
        self._students_collected = False

    def _collect_all_eligible_students_globally(self, areas: Areas) -> Dict[str, Set[int]]:
        """Pre-collect all eligible students globally in one pass to avoid repeated searches.
        This optimization reduces complexity from O(universities * areas * people) to O(areas * people).

        Args:
            areas (Areas): 

        """
        if self._students_collected:
            return self._global_student_pools

        logger.info("Pre-collecting all eligible students globally...")

        # Initialize pools
        student_pools = {
            "student_dorm": set(),
            "young_adult": set(), 
            "flexible": set(),
            "other": set()
        }

        total_eligible = 0

        for area in areas:
            # Search student dorms
            if hasattr(area, 'student_dorms') and area.student_dorms is not None:
                for student_dorm in area.student_dorms:
                    for person in student_dorm.residents:
                        if (self.min_student_age <= person.age <= self.max_student_age 
                            and person.primary_activity is None):
                            student_pools["student_dorm"].add(person.id)
                            total_eligible += 1

            # Search households
            for household in area.households:
                if household.type == "young_adult":
                    for person in household.residents:
                        if (self.min_student_age <= person.age <= self.max_student_age 
                            and person.primary_activity is None):
                            student_pools["young_adult"].add(person.id)
                            total_eligible += 1

                elif household.type == "flexible":
                    for person in household.residents:
                        if (self.min_student_age <= person.age <= self.max_student_age 
                            and person.primary_activity is None):
                            student_pools["flexible"].add(person.id)
                            total_eligible += 1

                else:
                    for person in household.residents:
                        if (self.min_student_age <= person.age <= self.max_student_age 
                            and person.primary_activity is None):
                            # Reduced probability for ages 23-24 in non-student households
                            if person.age in [23, 24]:
                                # Only 30% chance they are actually students
                                import random
                                if random.random() < 0.3:
                                    student_pools["other"].add(person.id)
                                    total_eligible += 1
                            else:
                                # Normal probability for younger ages (19-22)
                                student_pools["other"].add(person.id)
                                total_eligible += 1

        self._global_student_pools = student_pools
        self._students_collected = True

        logger.info(f"Collected {total_eligible} eligible students globally")
        for pool_type, students in student_pools.items():
            logger.info(f"  {pool_type}: {len(students)} students")

        return student_pools

    def _get_areas_within_distance_cached(self, areas: Areas, university: University, distance: float) -> np.ndarray:
        """Get areas within distance of university using cached BallTree results.
        This optimization avoids repeated expensive BallTree queries.

        Args:
            areas (Areas): 
            university (University): 
            distance (float): 

        """
        uni_id = id(university)  # Use object id as cache key

        if uni_id not in self._area_distance_cache:
            # First time for this university - compute all distances once
            logger.info(f"Computing distance cache for university {getattr(university, 'ukprn', uni_id)}")
            close_areas, distances = areas.get_closest_areas(
                coordinates=university.coordinates,
                k=min(len(areas), 1000),
                return_distance=True,
            )

            # Cache the results for different distance thresholds
            self._area_distance_cache[uni_id] = {
                'areas': np.array(close_areas),
                'distances': np.array(distances),
                'distance_caches': {}
            }

        cache_data = self._area_distance_cache[uni_id]

        # Check if we already computed areas for this distance threshold
        if distance not in cache_data['distance_caches']:
            # Use numba-accelerated filtering
            valid_indices = _filter_students_by_distance_numba(cache_data['distances'], distance)
            cache_data['distance_caches'][distance] = cache_data['areas'][valid_indices]

        return cache_data['distance_caches'][distance]

    def _assign_students_optimized(self, global_student_pools: Dict[str, Set[int]], people: Population):
        """Optimized student assignment algorithm that eliminates O(n²) complexity.
        Uses efficient round-robin assignment with O(1) set operations.

        Args:
            global_student_pools (Dict[str, Set[int]]): 
            people (Population): 

        """
        logger.info("Starting optimized student assignment...")

        # Convert sets to lists and shuffle for fair distribution
        student_lists = {}
        for pool_type, student_set in global_student_pools.items():
            student_list = list(student_set)
            shuffle(student_list)  # Randomize order for fair assignment
            student_lists[pool_type] = student_list
            logger.info(f"Available {pool_type} students: {len(student_list)}")

        # Track assigned students with set for O(1) lookups
        assigned_students = set()

        # Process in priority order: student_dorm > young_adult > flexible > other
        priority_order = ["student_dorm", "young_adult", "flexible", "other"]

        total_assigned = 0

        for pool_type in priority_order:
            if pool_type not in student_lists:
                continue

            students = student_lists[pool_type]
            student_idx = 0

            logger.info(f"Assigning {pool_type} students...")

            # Continue until all students assigned or all universities full
            while student_idx < len(students):
                made_assignment = False

                # Round-robin through universities
                for university in self.universities:
                    if university.is_full:
                        continue

                    # Find next unassigned student
                    while student_idx < len(students):
                        student_id = students[student_idx]
                        student_idx += 1

                        if student_id not in assigned_students:
                            # Assign student to university
                            assigned_students.add(student_id)
                            student = people.get_from_id(student_id)
                            university.add(student, subgroup="student")

                            # Add to registered members
                            if student.age not in age_to_years:
                                year = randint(0, university.n_years - 1)
                            else:
                                year = age_to_years[student.age]
                            university.add_to_registered_members(student_id, subgroup_type=year)

                            total_assigned += 1
                            made_assignment = True
                            break

                    if student_idx >= len(students):
                        break

                # If no assignments made in this round, all remaining universities are full
                if not made_assignment:
                    break

        logger.info(f"Assigned {total_assigned} students to universities")

        # Report final statistics
        filled_unis = sum(1 for uni in self.universities if uni.n_students > 0)
        full_unis = sum(1 for uni in self.universities if uni.is_full)
        logger.info(f"Universities with students: {filled_unis}/{len(self.universities)}")
        logger.info(f"Universities at capacity: {full_unis}/{len(self.universities)}")

    def find_students_in_areas(
        self, students_dict: dict, areas: Areas, university: University
    ):
        """

        Args:
            students_dict (dict): 
            areas (Areas): 
            university (University): 

        """
        found_students = 0
        total_people = 0
        age_eligible = 0
        already_has_activity = 0
        household_types = {"young_adult": 0, "flexible": 0, "other": 0}
        total_households = 0
        total_student_dorms = 0
        student_dorm_people = 0

        for i, area in enumerate(areas):
            # Search student dorms in this area
            if hasattr(area, 'student_dorms') and area.student_dorms is not None:
                for student_dorm in area.student_dorms:
                    total_student_dorms += 1
                    for person in student_dorm.residents:
                        student_dorm_people += 1
                        total_people += 1
                        if self.min_student_age <= person.age <= self.max_student_age:
                            age_eligible += 1
                            if person.primary_activity is None:
                                students_dict[university.ukprn]["student_dorm"].append(
                                    person.id
                                )
                                found_students += 1
                            else:
                                already_has_activity += 1


            # Search households
            total_households += len(area.households)

            for household in area.households:
                household_types[household.type if household.type in household_types else "other"] += 1
                if household.type == "young_adult":
                    for student in household.residents:
                        total_people += 1
                        if self.min_student_age <= student.age <= self.max_student_age:
                            age_eligible += 1
                            if student.primary_activity is None:
                                students_dict[university.ukprn]["young_adult"].append(
                                    student.id
                                )
                                found_students += 1
                            else:
                                already_has_activity += 1
                elif household.type == "flexible":
                    for person in household.residents:
                        total_people += 1
                        if self.min_student_age <= person.age <= self.max_student_age:
                            age_eligible += 1
                            if person.primary_activity is None:
                                students_dict[university.ukprn]["flexible"].append(
                                    person.id
                                )
                                found_students += 1
                            else:
                                already_has_activity += 1
                else:
                    for person in household.residents:
                        total_people += 1
                        if self.min_student_age <= person.age <= self.max_student_age:
                            age_eligible += 1
                            if person.primary_activity is None:
                                # Reduced probability for ages 23-24 in non-student households
                                if person.age in [23, 24]:
                                    # Only 30% chance they are actually students
                                    import random
                                    if random.random() < 0.3:
                                        students_dict[university.ukprn]["other"].append(
                                            person.id
                                        )
                                        found_students += 1
                                else:
                                    # Normal probability for younger ages (19-22)
                                    students_dict[university.ukprn]["other"].append(
                                        person.id
                                    )
                                    found_students += 1
                            else:
                                already_has_activity += 1

    def distribute_students_to_universities(self, areas: Areas, people: Population):
        """Optimized university distribution using pre-collected students and cached distances.

        Args:
            areas (Areas): 
            people (Population): 

        """
        logger.info("Distributing students to universities...")

        # Pre-collect all eligible students globally (major optimization)
        global_student_pools = self._collect_all_eligible_students_globally(areas)

        # Use optimized assignment algorithm
        self._assign_students_optimized(global_student_pools, people)

        logger.info("Completed university distribution")

        # Gather university data for visualization
        university_data = []
        for university in self.universities:
            # Get information about registered members
            total_registered = sum(len(members) for members in university.registered_members_ids.values())
            all_subgroups = list(university.registered_members_ids.keys())

            # Sample some IDs to display
            sampled_ids = []
            for subgroup, members in university.registered_members_ids.items():
                if members:
                    # Take up to 2 from each subgroup
                    for member_id in members[:2]:
                        sampled_ids.append(f"year{subgroup}:{member_id}")

            sampled_ids = sampled_ids[:5]  # Limit to 5 total

            university_data.append({
                "| University ID": university.id,
                "| Total Students": university.n_students,
                "| Total Registered Members": total_registered,
                "| Years": all_subgroups,
                "| Sample Registered Member IDs": sampled_ids,
                "| Max Capacity": university.n_students_max
            })

        # Calculate the total number of students
        total_students = sum(university.n_students for university in self.universities)
        logger.info(f"Total number of students distributed across all universities: {total_students}")

        # Show a sample of universities with their allocation results
        print(f"\n===== University Student Allocation Summary =====")
        print(f"Total universities: {len(self.universities)}")
        print(f"Total students allocated: {total_students}")

        # Sample up to 10 universities for detailed view
        sample_universities = []
        for university in self.universities[:10]:
            sample_universities.append({
                "| University ID": university.ukprn,
                "| Students Allocated": university.n_students,
                "| Max Capacity": university.n_students_max,
                "| Occupancy %": f"{(university.n_students / university.n_students_max * 100):.1f}%" if university.n_students_max > 0 else "0.0%",
                "| Status": "Full" if university.n_students >= university.n_students_max else "Available"
            })

        df_sample = pd.DataFrame(sample_universities)
        print(df_sample.to_string(index=False))

        # Show overall statistics
        filled_universities = sum(1 for uni in self.universities if uni.n_students > 0)
        full_universities = sum(1 for uni in self.universities if uni.n_students >= uni.n_students_max)
        total_capacity = sum(uni.n_students_max for uni in self.universities)

        print(f"\n===== Overall Statistics =====")
        print(f"Universities with students: {filled_universities}/{len(self.universities)}")
        print(f"Universities at capacity: {full_universities}/{len(self.universities)}")
        print(f"Total capacity: {total_capacity}")
        print(f"Overall occupancy: {(total_students / total_capacity * 100):.1f}%" if total_capacity > 0 else "0.0%")

    def _build_student_dict(self, areas, distance):
        """

        Args:
            areas: 
            distance: 

        """
        students_dict = defaultdict(lambda: defaultdict(list))
        # get students in areas
        for university in self.universities:
            close_areas, distances = areas.get_closest_areas(
                coordinates=university.coordinates,
                k=min(len(areas), 1000),
                return_distance=True,
            )
            close_areas = np.array(close_areas)[distances < distance]
            self.find_students_in_areas(
                students_dict=students_dict, areas=close_areas, university=university
            )
        return students_dict

    def _assign_students_to_unis(self, students_dict, people):
        """

        Args:
            students_dict: 
            people: 

        """
        # Track already assigned students across all universities
        assigned_students = set()

        for key in ["young_adult", "flexible", "other", "student_dorm"]:
            keep_key = True
            while keep_key:
                keep_key = False
                for university in self.universities:
                    # Filter out already assigned students from candidate list
                    student_candidates = [
                        student_id for student_id in students_dict[university.ukprn][key]
                        if student_id not in assigned_students
                    ]

                    # Update the list in the dictionary
                    students_dict[university.ukprn][key] = student_candidates

                    if student_candidates and not university.is_full:
                        student_id = student_candidates.pop()
                        # Mark this student as assigned
                        assigned_students.add(student_id)

                        student = people.get_from_id(student_id)
                        university.add(student, subgroup="student")

                        # Add to registered members
                        if student.age not in age_to_years:
                            year = randint(0, university.n_years - 1)
                        else:
                            year = age_to_years[student.age]
                        university.add_to_registered_members(student_id, subgroup_type=year)
                        keep_key = True

__init__(universities)

For each university it searches in the nearby areas for students living in student households. Once it has enough to fill the university, it stops searching and fills the university.

Parameters

universities a list of universities to fill max_number_of_areas maximum number of neighbour areas to look for students

Source code in june/distributors/university_distributor.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def __init__(self, universities: List[University]):
    """
    For each university it searches in the nearby areas for students living
    in student households. Once it has enough to fill the university, it stops
    searching and fills the university.

    Parameters
    ----------
    universities
        a list of universities to fill
    max_number_of_areas
        maximum number of neighbour areas to look for students
    """
    self.universities = universities
    self.min_student_age = 18
    self.max_student_age = 24

    # Optimization caches
    self._area_distance_cache: Dict[int, Dict[float, np.ndarray]] = {}
    self._global_student_pools: Dict[str, Set[int]] = {}
    self._students_collected = False

distribute_students_to_universities(areas, people)

Optimized university distribution using pre-collected students and cached distances.

Parameters:

Name Type Description Default
areas Areas
required
people Population
required
Source code in june/distributors/university_distributor.py
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
def distribute_students_to_universities(self, areas: Areas, people: Population):
    """Optimized university distribution using pre-collected students and cached distances.

    Args:
        areas (Areas): 
        people (Population): 

    """
    logger.info("Distributing students to universities...")

    # Pre-collect all eligible students globally (major optimization)
    global_student_pools = self._collect_all_eligible_students_globally(areas)

    # Use optimized assignment algorithm
    self._assign_students_optimized(global_student_pools, people)

    logger.info("Completed university distribution")

    # Gather university data for visualization
    university_data = []
    for university in self.universities:
        # Get information about registered members
        total_registered = sum(len(members) for members in university.registered_members_ids.values())
        all_subgroups = list(university.registered_members_ids.keys())

        # Sample some IDs to display
        sampled_ids = []
        for subgroup, members in university.registered_members_ids.items():
            if members:
                # Take up to 2 from each subgroup
                for member_id in members[:2]:
                    sampled_ids.append(f"year{subgroup}:{member_id}")

        sampled_ids = sampled_ids[:5]  # Limit to 5 total

        university_data.append({
            "| University ID": university.id,
            "| Total Students": university.n_students,
            "| Total Registered Members": total_registered,
            "| Years": all_subgroups,
            "| Sample Registered Member IDs": sampled_ids,
            "| Max Capacity": university.n_students_max
        })

    # Calculate the total number of students
    total_students = sum(university.n_students for university in self.universities)
    logger.info(f"Total number of students distributed across all universities: {total_students}")

    # Show a sample of universities with their allocation results
    print(f"\n===== University Student Allocation Summary =====")
    print(f"Total universities: {len(self.universities)}")
    print(f"Total students allocated: {total_students}")

    # Sample up to 10 universities for detailed view
    sample_universities = []
    for university in self.universities[:10]:
        sample_universities.append({
            "| University ID": university.ukprn,
            "| Students Allocated": university.n_students,
            "| Max Capacity": university.n_students_max,
            "| Occupancy %": f"{(university.n_students / university.n_students_max * 100):.1f}%" if university.n_students_max > 0 else "0.0%",
            "| Status": "Full" if university.n_students >= university.n_students_max else "Available"
        })

    df_sample = pd.DataFrame(sample_universities)
    print(df_sample.to_string(index=False))

    # Show overall statistics
    filled_universities = sum(1 for uni in self.universities if uni.n_students > 0)
    full_universities = sum(1 for uni in self.universities if uni.n_students >= uni.n_students_max)
    total_capacity = sum(uni.n_students_max for uni in self.universities)

    print(f"\n===== Overall Statistics =====")
    print(f"Universities with students: {filled_universities}/{len(self.universities)}")
    print(f"Universities at capacity: {full_universities}/{len(self.universities)}")
    print(f"Total capacity: {total_capacity}")
    print(f"Overall occupancy: {(total_students / total_capacity * 100):.1f}%" if total_capacity > 0 else "0.0%")

find_students_in_areas(students_dict, areas, university)

Parameters:

Name Type Description Default
students_dict dict
required
areas Areas
required
university University
required
Source code in june/distributors/university_distributor.py
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
def find_students_in_areas(
    self, students_dict: dict, areas: Areas, university: University
):
    """

    Args:
        students_dict (dict): 
        areas (Areas): 
        university (University): 

    """
    found_students = 0
    total_people = 0
    age_eligible = 0
    already_has_activity = 0
    household_types = {"young_adult": 0, "flexible": 0, "other": 0}
    total_households = 0
    total_student_dorms = 0
    student_dorm_people = 0

    for i, area in enumerate(areas):
        # Search student dorms in this area
        if hasattr(area, 'student_dorms') and area.student_dorms is not None:
            for student_dorm in area.student_dorms:
                total_student_dorms += 1
                for person in student_dorm.residents:
                    student_dorm_people += 1
                    total_people += 1
                    if self.min_student_age <= person.age <= self.max_student_age:
                        age_eligible += 1
                        if person.primary_activity is None:
                            students_dict[university.ukprn]["student_dorm"].append(
                                person.id
                            )
                            found_students += 1
                        else:
                            already_has_activity += 1


        # Search households
        total_households += len(area.households)

        for household in area.households:
            household_types[household.type if household.type in household_types else "other"] += 1
            if household.type == "young_adult":
                for student in household.residents:
                    total_people += 1
                    if self.min_student_age <= student.age <= self.max_student_age:
                        age_eligible += 1
                        if student.primary_activity is None:
                            students_dict[university.ukprn]["young_adult"].append(
                                student.id
                            )
                            found_students += 1
                        else:
                            already_has_activity += 1
            elif household.type == "flexible":
                for person in household.residents:
                    total_people += 1
                    if self.min_student_age <= person.age <= self.max_student_age:
                        age_eligible += 1
                        if person.primary_activity is None:
                            students_dict[university.ukprn]["flexible"].append(
                                person.id
                            )
                            found_students += 1
                        else:
                            already_has_activity += 1
            else:
                for person in household.residents:
                    total_people += 1
                    if self.min_student_age <= person.age <= self.max_student_age:
                        age_eligible += 1
                        if person.primary_activity is None:
                            # Reduced probability for ages 23-24 in non-student households
                            if person.age in [23, 24]:
                                # Only 30% chance they are actually students
                                import random
                                if random.random() < 0.3:
                                    students_dict[university.ukprn]["other"].append(
                                        person.id
                                    )
                                    found_students += 1
                            else:
                                # Normal probability for younger ages (19-22)
                                students_dict[university.ukprn]["other"].append(
                                    person.id
                                )
                                found_students += 1
                        else:
                            already_has_activity += 1