28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477 | class UniversityDistributor:
""" """
def __init__(self, universities: List[University]):
"""
For each university it searches in the nearby areas for students living
in student households. Once it has enough to fill the university, it stops
searching and fills the university.
Parameters
----------
universities
a list of universities to fill
max_number_of_areas
maximum number of neighbour areas to look for students
"""
self.universities = universities
self.min_student_age = 18
self.max_student_age = 24
# Optimization caches
self._area_distance_cache: Dict[int, Dict[float, np.ndarray]] = {}
self._global_student_pools: Dict[str, Set[int]] = {}
self._students_collected = False
def _collect_all_eligible_students_globally(self, areas: Areas) -> Dict[str, Set[int]]:
"""Pre-collect all eligible students globally in one pass to avoid repeated searches.
This optimization reduces complexity from O(universities * areas * people) to O(areas * people).
Args:
areas (Areas):
"""
if self._students_collected:
return self._global_student_pools
logger.info("Pre-collecting all eligible students globally...")
# Initialize pools
student_pools = {
"student_dorm": set(),
"young_adult": set(),
"flexible": set(),
"other": set()
}
total_eligible = 0
for area in areas:
# Search student dorms
if hasattr(area, 'student_dorms') and area.student_dorms is not None:
for student_dorm in area.student_dorms:
for person in student_dorm.residents:
if (self.min_student_age <= person.age <= self.max_student_age
and person.primary_activity is None):
student_pools["student_dorm"].add(person.id)
total_eligible += 1
# Search households
for household in area.households:
if household.type == "young_adult":
for person in household.residents:
if (self.min_student_age <= person.age <= self.max_student_age
and person.primary_activity is None):
student_pools["young_adult"].add(person.id)
total_eligible += 1
elif household.type == "flexible":
for person in household.residents:
if (self.min_student_age <= person.age <= self.max_student_age
and person.primary_activity is None):
student_pools["flexible"].add(person.id)
total_eligible += 1
else:
for person in household.residents:
if (self.min_student_age <= person.age <= self.max_student_age
and person.primary_activity is None):
# Reduced probability for ages 23-24 in non-student households
if person.age in [23, 24]:
# Only 30% chance they are actually students
import random
if random.random() < 0.3:
student_pools["other"].add(person.id)
total_eligible += 1
else:
# Normal probability for younger ages (19-22)
student_pools["other"].add(person.id)
total_eligible += 1
self._global_student_pools = student_pools
self._students_collected = True
logger.info(f"Collected {total_eligible} eligible students globally")
for pool_type, students in student_pools.items():
logger.info(f" {pool_type}: {len(students)} students")
return student_pools
def _get_areas_within_distance_cached(self, areas: Areas, university: University, distance: float) -> np.ndarray:
"""Get areas within distance of university using cached BallTree results.
This optimization avoids repeated expensive BallTree queries.
Args:
areas (Areas):
university (University):
distance (float):
"""
uni_id = id(university) # Use object id as cache key
if uni_id not in self._area_distance_cache:
# First time for this university - compute all distances once
logger.info(f"Computing distance cache for university {getattr(university, 'ukprn', uni_id)}")
close_areas, distances = areas.get_closest_areas(
coordinates=university.coordinates,
k=min(len(areas), 1000),
return_distance=True,
)
# Cache the results for different distance thresholds
self._area_distance_cache[uni_id] = {
'areas': np.array(close_areas),
'distances': np.array(distances),
'distance_caches': {}
}
cache_data = self._area_distance_cache[uni_id]
# Check if we already computed areas for this distance threshold
if distance not in cache_data['distance_caches']:
# Use numba-accelerated filtering
valid_indices = _filter_students_by_distance_numba(cache_data['distances'], distance)
cache_data['distance_caches'][distance] = cache_data['areas'][valid_indices]
return cache_data['distance_caches'][distance]
def _assign_students_optimized(self, global_student_pools: Dict[str, Set[int]], people: Population):
"""Optimized student assignment algorithm that eliminates O(n²) complexity.
Uses efficient round-robin assignment with O(1) set operations.
Args:
global_student_pools (Dict[str, Set[int]]):
people (Population):
"""
logger.info("Starting optimized student assignment...")
# Convert sets to lists and shuffle for fair distribution
student_lists = {}
for pool_type, student_set in global_student_pools.items():
student_list = list(student_set)
shuffle(student_list) # Randomize order for fair assignment
student_lists[pool_type] = student_list
logger.info(f"Available {pool_type} students: {len(student_list)}")
# Track assigned students with set for O(1) lookups
assigned_students = set()
# Process in priority order: student_dorm > young_adult > flexible > other
priority_order = ["student_dorm", "young_adult", "flexible", "other"]
total_assigned = 0
for pool_type in priority_order:
if pool_type not in student_lists:
continue
students = student_lists[pool_type]
student_idx = 0
logger.info(f"Assigning {pool_type} students...")
# Continue until all students assigned or all universities full
while student_idx < len(students):
made_assignment = False
# Round-robin through universities
for university in self.universities:
if university.is_full:
continue
# Find next unassigned student
while student_idx < len(students):
student_id = students[student_idx]
student_idx += 1
if student_id not in assigned_students:
# Assign student to university
assigned_students.add(student_id)
student = people.get_from_id(student_id)
university.add(student, subgroup="student")
# Add to registered members
if student.age not in age_to_years:
year = randint(0, university.n_years - 1)
else:
year = age_to_years[student.age]
university.add_to_registered_members(student_id, subgroup_type=year)
total_assigned += 1
made_assignment = True
break
if student_idx >= len(students):
break
# If no assignments made in this round, all remaining universities are full
if not made_assignment:
break
logger.info(f"Assigned {total_assigned} students to universities")
# Report final statistics
filled_unis = sum(1 for uni in self.universities if uni.n_students > 0)
full_unis = sum(1 for uni in self.universities if uni.is_full)
logger.info(f"Universities with students: {filled_unis}/{len(self.universities)}")
logger.info(f"Universities at capacity: {full_unis}/{len(self.universities)}")
def find_students_in_areas(
self, students_dict: dict, areas: Areas, university: University
):
"""
Args:
students_dict (dict):
areas (Areas):
university (University):
"""
found_students = 0
total_people = 0
age_eligible = 0
already_has_activity = 0
household_types = {"young_adult": 0, "flexible": 0, "other": 0}
total_households = 0
total_student_dorms = 0
student_dorm_people = 0
for i, area in enumerate(areas):
# Search student dorms in this area
if hasattr(area, 'student_dorms') and area.student_dorms is not None:
for student_dorm in area.student_dorms:
total_student_dorms += 1
for person in student_dorm.residents:
student_dorm_people += 1
total_people += 1
if self.min_student_age <= person.age <= self.max_student_age:
age_eligible += 1
if person.primary_activity is None:
students_dict[university.ukprn]["student_dorm"].append(
person.id
)
found_students += 1
else:
already_has_activity += 1
# Search households
total_households += len(area.households)
for household in area.households:
household_types[household.type if household.type in household_types else "other"] += 1
if household.type == "young_adult":
for student in household.residents:
total_people += 1
if self.min_student_age <= student.age <= self.max_student_age:
age_eligible += 1
if student.primary_activity is None:
students_dict[university.ukprn]["young_adult"].append(
student.id
)
found_students += 1
else:
already_has_activity += 1
elif household.type == "flexible":
for person in household.residents:
total_people += 1
if self.min_student_age <= person.age <= self.max_student_age:
age_eligible += 1
if person.primary_activity is None:
students_dict[university.ukprn]["flexible"].append(
person.id
)
found_students += 1
else:
already_has_activity += 1
else:
for person in household.residents:
total_people += 1
if self.min_student_age <= person.age <= self.max_student_age:
age_eligible += 1
if person.primary_activity is None:
# Reduced probability for ages 23-24 in non-student households
if person.age in [23, 24]:
# Only 30% chance they are actually students
import random
if random.random() < 0.3:
students_dict[university.ukprn]["other"].append(
person.id
)
found_students += 1
else:
# Normal probability for younger ages (19-22)
students_dict[university.ukprn]["other"].append(
person.id
)
found_students += 1
else:
already_has_activity += 1
def distribute_students_to_universities(self, areas: Areas, people: Population):
"""Optimized university distribution using pre-collected students and cached distances.
Args:
areas (Areas):
people (Population):
"""
logger.info("Distributing students to universities...")
# Pre-collect all eligible students globally (major optimization)
global_student_pools = self._collect_all_eligible_students_globally(areas)
# Use optimized assignment algorithm
self._assign_students_optimized(global_student_pools, people)
logger.info("Completed university distribution")
# Gather university data for visualization
university_data = []
for university in self.universities:
# Get information about registered members
total_registered = sum(len(members) for members in university.registered_members_ids.values())
all_subgroups = list(university.registered_members_ids.keys())
# Sample some IDs to display
sampled_ids = []
for subgroup, members in university.registered_members_ids.items():
if members:
# Take up to 2 from each subgroup
for member_id in members[:2]:
sampled_ids.append(f"year{subgroup}:{member_id}")
sampled_ids = sampled_ids[:5] # Limit to 5 total
university_data.append({
"| University ID": university.id,
"| Total Students": university.n_students,
"| Total Registered Members": total_registered,
"| Years": all_subgroups,
"| Sample Registered Member IDs": sampled_ids,
"| Max Capacity": university.n_students_max
})
# Calculate the total number of students
total_students = sum(university.n_students for university in self.universities)
logger.info(f"Total number of students distributed across all universities: {total_students}")
# Show a sample of universities with their allocation results
print(f"\n===== University Student Allocation Summary =====")
print(f"Total universities: {len(self.universities)}")
print(f"Total students allocated: {total_students}")
# Sample up to 10 universities for detailed view
sample_universities = []
for university in self.universities[:10]:
sample_universities.append({
"| University ID": university.ukprn,
"| Students Allocated": university.n_students,
"| Max Capacity": university.n_students_max,
"| Occupancy %": f"{(university.n_students / university.n_students_max * 100):.1f}%" if university.n_students_max > 0 else "0.0%",
"| Status": "Full" if university.n_students >= university.n_students_max else "Available"
})
df_sample = pd.DataFrame(sample_universities)
print(df_sample.to_string(index=False))
# Show overall statistics
filled_universities = sum(1 for uni in self.universities if uni.n_students > 0)
full_universities = sum(1 for uni in self.universities if uni.n_students >= uni.n_students_max)
total_capacity = sum(uni.n_students_max for uni in self.universities)
print(f"\n===== Overall Statistics =====")
print(f"Universities with students: {filled_universities}/{len(self.universities)}")
print(f"Universities at capacity: {full_universities}/{len(self.universities)}")
print(f"Total capacity: {total_capacity}")
print(f"Overall occupancy: {(total_students / total_capacity * 100):.1f}%" if total_capacity > 0 else "0.0%")
def _build_student_dict(self, areas, distance):
"""
Args:
areas:
distance:
"""
students_dict = defaultdict(lambda: defaultdict(list))
# get students in areas
for university in self.universities:
close_areas, distances = areas.get_closest_areas(
coordinates=university.coordinates,
k=min(len(areas), 1000),
return_distance=True,
)
close_areas = np.array(close_areas)[distances < distance]
self.find_students_in_areas(
students_dict=students_dict, areas=close_areas, university=university
)
return students_dict
def _assign_students_to_unis(self, students_dict, people):
"""
Args:
students_dict:
people:
"""
# Track already assigned students across all universities
assigned_students = set()
for key in ["young_adult", "flexible", "other", "student_dorm"]:
keep_key = True
while keep_key:
keep_key = False
for university in self.universities:
# Filter out already assigned students from candidate list
student_candidates = [
student_id for student_id in students_dict[university.ukprn][key]
if student_id not in assigned_students
]
# Update the list in the dictionary
students_dict[university.ukprn][key] = student_candidates
if student_candidates and not university.is_full:
student_id = student_candidates.pop()
# Mark this student as assigned
assigned_students.add(student_id)
student = people.get_from_id(student_id)
university.add(student, subgroup="student")
# Add to registered members
if student.age not in age_to_years:
year = randint(0, university.n_years - 1)
else:
year = age_to_years[student.age]
university.add_to_registered_members(student_id, subgroup_type=year)
keep_key = True
|