Skip to content

School distributor

SchoolDistributor

Distributes students in an area to different schools

Source code in june/distributors/school_distributor.py
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
class SchoolDistributor:
    """Distributes students in an area to different schools"""

    def __init__(
        self,
        schools: Schools,
        education_sector_label="P",
        neighbour_schools: int = 35,
        age_range: Tuple[int, int] = (0, 19),
        mandatory_age_range: Tuple[int, int] = (5, 18),
        teacher_student_ratio_primary=21,
        teacher_student_ratio_secondary=16,
        teacher_min_age=21,
        max_classroom_size=40,
    ):
        """
        Get closest schools to this output area, per age group
        (different schools admit pupils with different age ranges)

        Parameters
        ----------
        schools:
            instance of Schools, with information on all schools in world.
        area:
            instance of Area.
        config:
            config dictionary.
        """
        self.schools = schools
        self.neighbour_schools = neighbour_schools
        self.school_age_range = age_range
        self.mandatory_school_age_range = mandatory_age_range
        self.education_sector_label = education_sector_label
        self.teacher_min_age = teacher_min_age
        self.teacher_student_ratio_primary = teacher_student_ratio_primary
        self.teacher_student_ratio_secondary = teacher_student_ratio_secondary
        self.max_classroom_size = max_classroom_size

        # Cache for pre-computed school assignments by area and age
        self._area_school_cache = {}

        # Cache for pre-computed gender weights by school group
        self._gender_weights_cache = {}

        # Optimized school capacity tracking
        self._school_capacity_tracker = {}

    @classmethod
    def from_file(
        cls,
        schools: "Schools",
        config_filename: str = default_config_filename,
        # mandatory_age_range: Tuple[int, int] = (5, 18),#part of config ?
    ) -> "SchoolDistributor":
        """Initialise SchoolDistributor from path to its config file

        Args:
            schools ("Schools"): instance of Schools, with information on all schools in world.
            config_filename (str, optional): (Default value = default_config_filename)
            # (Tuple[int, int], optional): (Default value = (5, 18)

        Returns:
            SchoolDistributor instance: 

        """
        with open(config_filename) as f:
            config = yaml.load(f, Loader=yaml.FullLoader)
        education_sector_label = SchoolDistributor.find_jobs(config)
        return SchoolDistributor(
            schools,
            education_sector_label,
            config["neighbour_schools"],
            config["age_range"],
            config["mandatory_age_range"],
            config["teacher_min_age"],
            config["max_classroom_size"],
        )

    @classmethod
    def from_geography(
        cls, geography: Geography, config_filename: str = default_config_filename
    ):
        """

        Args:
            geography (Geography): 
            config_filename (str, optional): (Default value = default_config_filename)

        """
        return cls.from_file(geography.schools, config_filename)

    @staticmethod
    def find_jobs(config: dict):
        """

        Args:
            config (dict): 

        """
        education_sector_label = []
        for value1 in config.values():
            if isinstance(value1, dict):
                for value2 in value1.values():
                    education_sector_label.append(value2["sector_id"])
        return education_sector_label

    def _precompute_area_school_assignments(self, areas: List[Area]):
        """Pre-compute closest schools for each area and age combination to avoid repeated queries.
        This optimization reduces the number of BallTree queries from millions to thousands.

        Args:
            areas (List[Area]): 

        """
        logger.info("Pre-computing school assignments for all areas...")

        for i, area in enumerate(areas):
            if i % 1000 == 0:
                logger.info(f"Pre-computed assignments for {i} of {len(areas)} areas so far.")

            area_cache = {}

            for agegroup in self.schools.school_trees:
                closest_schools_idx = self.schools.get_closest_schools(
                    agegroup, area.coordinates, self.neighbour_schools
                )
                closest_schools = []
                for idx in closest_schools_idx:
                    real_idx = self.schools.school_agegroup_to_global_indices[agegroup][idx]
                    closest_schools.append(self.schools.members[real_idx])

                area_cache[agegroup] = closest_schools

            self._area_school_cache[area.id] = area_cache

        logger.info("Completed pre-computing school assignments")

    def _initialize_school_capacity_tracking(self):
        """Initialize optimized school capacity tracking data structures"""
        self._school_capacity_tracker = {}

        for school in self.schools.members:
            self._school_capacity_tracker[school.id] = {
                'current_pupils': school.n_pupils,
                'max_pupils': school.n_pupils_max,
                'is_full': school.n_pupils >= school.n_pupils_max
            }

    def _update_school_capacity(self, school):
        """Efficiently update school capacity tracking when a student is added

        Args:
            school: 

        """
        tracker = self._school_capacity_tracker[school.id]
        tracker['current_pupils'] += 1
        tracker['is_full'] = tracker['current_pupils'] >= tracker['max_pupils']

    def _get_available_schools_fast(self, schools_list, age_group_full):
        """Fast filtering of available schools using capacity tracker and numba

        Args:
            schools_list: 
            age_group_full: 

        """
        if age_group_full:
            return schools_list  # All schools available when age group is marked full

        # Extract fullness status as numpy array for numba processing
        is_full_array = np.array([self._school_capacity_tracker[school.id]['is_full'] 
                                 for school in schools_list], dtype=bool)

        # Use numba-accelerated filtering
        available_indices = _filter_available_schools_numba(is_full_array)

        # Return corresponding schools
        return [schools_list[i] for i in available_indices]

    def distribute_kids_to_school(self, areas: List[Area]):
        """Function to distribute kids to schools according to distance

        Args:
            areas (List[Area]): 

        """
        logger.info("Distributing kids to schools")

        # Initialize optimizations
        if not self._area_school_cache:
            self._precompute_area_school_assignments(areas)

        if not self._school_capacity_tracker:
            self._initialize_school_capacity_tracking()

        for i, area in enumerate(areas):
            if i % 4000 == 0:
                logger.info(f"Distributed kids in {i} of {len(areas)} areas so far.")

            # Use pre-computed closest schools from cache
            closest_schools_by_age = self._area_school_cache[area.id]
            is_school_full = {agegroup: False for agegroup in self.schools.school_trees}

            self.distribute_mandatory_kids_to_school(
                area, is_school_full, closest_schools_by_age
            )
            self.distribute_non_mandatory_kids_to_school(
                area, is_school_full, closest_schools_by_age
            )
        logger.info("Kids distributed to schools")

        # Visualise the final distribution of kids to schools with a sample of Student IDs
        sample_data = []
        for school in random.sample(self.schools.members, min(10, len(self.schools.members))):
            # Get information about registered members
            total_registered = sum(len(members) for members in school.registered_members_ids.values())
            all_subgroups = list(school.registered_members_ids.keys())

            # Sample some IDs to display
            sampled_ids = []
            for subgroup, members in school.registered_members_ids.items():
                if members:
                    # Take up to 2 from each subgroup
                    for member_id in members[:2]:
                        sampled_ids.append(f"sg{subgroup}:{member_id}")

            sampled_ids = sampled_ids[:5]  # Limit to 5 total

            sample_data.append({
                "| School ID": school.id,
                "| Area": school.area.name if school.area else "Unknown Area",
                "| Total Students": len(school.students),
                "| Total Registered Members": total_registered,
                "| Subgroups": all_subgroups,
                "| Sample Registered Member IDs": sampled_ids
            })

        df_schools = pd.DataFrame(sample_data)
        print("\n===== Sample of Registered Members in Schools =====")
        print(df_schools)

    def _get_gender_weights_cache_key(self, available_schools):
        """Generate cache key for a set of available schools

        Args:
            available_schools: 

        """
        return tuple(sorted(school.id for school in available_schools))

    def _precompute_gender_weights(self, available_schools):
        """Pre-compute gender-based weights for a set of schools using numba acceleration

        Args:
            available_schools: 

        """
        cache_key = self._get_gender_weights_cache_key(available_schools)

        if cache_key in self._gender_weights_cache:
            return self._gender_weights_cache[cache_key]

        # Extract gender ratios as numpy arrays for numba processing
        boy_ratios = np.array([school.boy_ratio for school in available_schools], dtype=np.float64)
        girl_ratios = np.array([school.girl_ratio for school in available_schools], dtype=np.float64)

        # Use numba-accelerated computation
        male_probs = _compute_gender_weights_numba(boy_ratios, girl_ratios, False)
        female_probs = _compute_gender_weights_numba(boy_ratios, girl_ratios, True)

        result = {
            'male_probs': male_probs,
            'female_probs': female_probs,
            'schools': available_schools
        }

        self._gender_weights_cache[cache_key] = result
        return result

    def _select_school_with_gender_preference(self, person, available_schools):
        """Select school from available schools based on person's gender and school gender ratios.
        Uses pre-computed weights for better performance.

        Args:
            person: 
            available_schools: 

        """
        if not available_schools:
            return None

        if len(available_schools) == 1:
            return available_schools[0]

        # Get or compute cached weights
        weights_data = self._precompute_gender_weights(available_schools)

        # Select appropriate probabilities based on gender
        if person.sex == 'f':
            probabilities = weights_data['female_probs']
        else:
            probabilities = weights_data['male_probs']

        # Select school using weighted random choice
        return np.random.choice(available_schools, p=probabilities)

    def distribute_mandatory_kids_to_school(
        self, area: Area, is_school_full: dict, closest_schools_by_age: dict
    ):
        """Send kids to the nearest school among the self.neighbour_schools,
        that has vacancies. If none of them has vacancies, pick one of them
        at random using gender-weighted selection (making it larger than it should be)

        Args:
            area (Area): 
            is_school_full (dict): 
            closest_schools_by_age (dict): 

        """
        students_assigned = 0
        school_age_kids = 0
        for person in area.people:
            if (
                (person.age <= self.mandatory_school_age_range[1]
                and person.age >= self.mandatory_school_age_range[0]) 
                or (person.age == 19 and person.residence.spec == 'boarding_school')
            ):
                # Skip 18-year-olds living in student dorms - they should be university students, not school students
                if person.age == 18 and person.residence.spec == 'student_dorm':
                    continue

                if person.age == 18 and person.residence.spec == 'household': 
                    if person.residence.group.type == 'young_adult' or person.residence.group.type == 'flexible':
                        continue
                    else:
                        # 66% chance they go to school, 34% chance they go to uni
                        if np.random.random() < 0.34:
                            # 34% chance they go to university - skip them
                            continue


                school_age_kids += 1
                if person.age not in is_school_full:
                    continue
                if is_school_full[person.age]:
                    # All schools are full, use gender-weighted selection from available schools
                    available_schools = closest_schools_by_age[person.age][:self.neighbour_schools]
                    school = self._select_school_with_gender_preference(person, available_schools)
                else:
                    # Use optimized capacity tracking to find non-full schools
                    candidate_schools = closest_schools_by_age[person.age][:self.neighbour_schools]
                    non_full_schools = self._get_available_schools_fast(candidate_schools, is_school_full[person.age])

                    if non_full_schools:
                        # Use gender-weighted selection among non-full schools
                        school = self._select_school_with_gender_preference(person, non_full_schools)
                    else:
                        # All schools are full, mark as such and use gender-weighted selection from all
                        is_school_full[person.age] = True
                        available_schools = closest_schools_by_age[person.age][:self.neighbour_schools]
                        school = self._select_school_with_gender_preference(person, available_schools)
                # remove from working population
                if person.work_super_area is not None:
                    person.work_super_area.remove_worker(person)
                school.add(person)
                # Update capacity tracker
                self._update_school_capacity(school)
                # Add to registered members
                subgroup = 0 if person.age > school.age_max else person.age - school.age_min + 1
                school.add_to_registered_members(person.id, subgroup_type=subgroup)
                students_assigned += 1

    def distribute_non_mandatory_kids_to_school(
        self, area: Area, is_school_full: dict, closest_schools_by_age: dict
    ):
        """For kids in age ranges that might go to school, but it is not mandatory
        send them to the closest school that has vacancies among the self.max_schools closests.
        If none of them has vacancies do not send them to school

        Args:
            area (Area): 
            is_school_full (dict): 
            closest_schools_by_age (dict): 

        """
        for person in area.people:
            if (
                self.school_age_range[0]
                < person.age
                < self.mandatory_school_age_range[0]
                or self.mandatory_school_age_range[1]
                < person.age
                < self.school_age_range[1]
            ):
                # Skip 18-year-olds living in student dorms - they should be university students, not school students
                if person.age == 18 and hasattr(person, 'residence') and hasattr(person.residence, 'spec') and person.residence.spec == 'student_dorm':
                    continue

                if person.age not in is_school_full or is_school_full[person.age]:
                    continue
                else:
                    # Find all suitable schools with capacity for this age group
                    suitable_schools = []
                    for i in range(min(self.neighbour_schools, len(closest_schools_by_age[person.age]))):
                        candidate_school = closest_schools_by_age[person.age][i]
                        # check number of students in that age group
                        yearindex = person.age - candidate_school.age_min + 1
                        if yearindex < len(candidate_school.subgroups):
                            n_pupils_age = len(candidate_school.subgroups[yearindex].people)
                            if (candidate_school.n_pupils < candidate_school.n_pupils_max) and (
                                n_pupils_age < (candidate_school.n_pupils_max / (candidate_school.age_max - candidate_school.age_min))
                            ):
                                suitable_schools.append(candidate_school)

                    if suitable_schools:
                        # Use gender-weighted selection among suitable schools
                        school = self._select_school_with_gender_preference(person, suitable_schools)
                        if person.work_super_area is not None:
                            person.work_super_area.remove_worker(person)
                        school.add(person)
                        # Update capacity tracker
                        self._update_school_capacity(school)
                        # Add to registered members
                        subgroup = 0 if person.age > school.age_max else person.age - school.age_min + 1
                        school.add_to_registered_members(person.id, subgroup_type=subgroup)

    def distribute_teachers_to_schools_in_super_areas(
        self, super_areas: List[SuperArea]
    ):
        """

        Args:
            super_areas (List[SuperArea]): 

        """
        for super_area in super_areas:
            self.distribute_teachers_to_school(super_area)

        classroom_distribution_data = []
        for super_area in super_areas:
            for area in super_area.areas:
                for school in area.schools:
                    # Gather data for each classroom (starting from index 1)
                    for classroom_id, classroom in enumerate(school.subgroups[1:], start=1):
                        if not classroom.people:
                            continue  # Skip empty classrooms

                        classroom_distribution_data.append({
                            "| School ID": school.id if hasattr(school, 'id') else "Unknown",
                            "| Classroom ID": classroom_id,
                            "| Total Students in Classroom": len(classroom.people)
                        })

        # Convert data to a DataFrame for easy viewing and show only a sample of 10
        df_classrooms = pd.DataFrame(classroom_distribution_data)
        print("\n===== Teacher Assignment to Schools =====")
        print(df_classrooms.sample(n=min(10, len(df_classrooms))))

    def distribute_teachers_to_school(self, super_area: SuperArea):
        """Assigns teachers to super area. The strategy is the following:
        we loop over the schools to divide them into two subgroups,
        primary schools and secondary schools. If a school is both, then
        we assign it randomly to one of the two.
        Then we loop over the workers in the super area to find the teachers,
        which we also divide into two subgroups analogously to the schools.
        We assign the teachers to the schools following a fix student to teacher ratio.
        We put a lower age limit to teachers at the age of 21.

        Args:
            super_area (SuperArea): 

        """
        # separate schools in primary and secondary based on age ranges served
        primary_schools = []
        secondary_schools = []
        for area in super_area.areas:
            for school in area.schools:
                if school.n_pupils == 0:
                    continue

                # Classify schools based on age ranges they serve
                # Primary: serves ages <= 11, Secondary: serves ages > 11
                serves_primary = school.age_min <= 11 and school.age_max <= 11
                serves_secondary = school.age_min > 11 or school.age_max > 11
                serves_both = school.age_min <= 11 and school.age_max > 11

                if serves_both:
                    # All-through schools: assign based on which age range they serve more
                    primary_ages = min(11, school.age_max) - school.age_min + 1
                    total_ages = school.age_max - school.age_min + 1
                    primary_weight = primary_ages / total_ages

                    # Assign to category based on majority age coverage
                    if primary_weight > 0.5:
                        primary_schools.append(school)
                    else:
                        secondary_schools.append(school)
                elif serves_primary:
                    primary_schools.append(school)
                elif serves_secondary:
                    secondary_schools.append(school)
                else:
                    # Fallback for edge cases - assign randomly
                    idx = np.random.randint(0, 2)
                    if idx == 0:
                        primary_schools.append(school)
                    else:
                        secondary_schools.append(school)
        # assign teacher to student ratios in schools
        for school in primary_schools:
            school.n_teachers_max = int(
                np.round(
                    school.n_pupils
                    / np.random.poisson(self.teacher_student_ratio_primary)
                )
            )
        for school in secondary_schools:
            school.n_teachers_max = int(
                np.round(
                    school.n_pupils
                    / np.random.poisson(self.teacher_student_ratio_secondary)
                )
            )

        np.random.shuffle(primary_schools)
        np.random.shuffle(secondary_schools)
        all_teachers = [
            person
            for person in super_area.workers
            if person.sector == self.education_sector_label
            and person.age > self.teacher_min_age
            and person.primary_activity is None
        ]
        primary_teachers = []
        secondary_teachers = []
        extra_teachers = []
        for teacher in all_teachers:
            if teacher.sub_sector == "teacher_primary":
                primary_teachers.append(teacher)
            elif teacher.sub_sector == "teacher_secondary":
                secondary_teachers.append(teacher)
            else:
                extra_teachers.append(teacher)
        np.random.shuffle(primary_teachers)
        np.random.shuffle(secondary_teachers)
        np.random.shuffle(extra_teachers)
        while primary_teachers:
            all_filled = True
            for primary_school in primary_schools:
                if primary_school.n_pupils == 0:
                    continue
                if primary_school.n_teachers < primary_school.n_teachers_max:
                    all_filled = False
                    teacher = primary_teachers.pop()
                    if not primary_teachers:
                        all_filled = True
                        break
                    primary_school.add(teacher)
                    primary_school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
                    teacher.lockdown_status = "key_worker"
            if all_filled:
                break

        while secondary_teachers:
            all_filled = True
            for secondary_school in secondary_schools:
                if secondary_school.n_pupils == 0:
                    continue
                if secondary_school.n_teachers < secondary_school.n_teachers_max:
                    all_filled = False
                    teacher = secondary_teachers.pop()
                    if not secondary_teachers:
                        all_filled = True
                        break
                    secondary_school.add(teacher)
                    secondary_school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
                    teacher.lockdown_status = "key_worker"
            if all_filled:
                break

        remaining_teachers = primary_teachers + secondary_teachers + extra_teachers
        empty_schools = [
            school
            for school in primary_schools + secondary_schools
            if school.n_pupils > 0 and school.n_teachers == 0
        ]
        for school in empty_schools:
            if not remaining_teachers:
                break
            teacher = remaining_teachers.pop()
            school.add(teacher)
            school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
            teacher.lockdown_status = "key_worker"

        while remaining_teachers:
            all_filled = True
            for school in primary_schools + secondary_schools:
                if school.n_pupils == 0:
                    continue
                if school.n_teachers < school.n_teachers_max:
                    all_filled = False
                    teacher = remaining_teachers.pop()
                    if not remaining_teachers:
                        all_filled = True
                        break
                    school.add(teacher)
                    school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
                    teacher.lockdown_status = "key_worker"
            if all_filled:
                break

    def limit_classroom_sizes(
        self,
    ):
        """Limit subgroup sizes that represent class rooms to a maximum number of students.
        If maximum number is exceeded create new subgroups to distribute students homogeneously

        """
        for school in self.schools:
            school.limit_classroom_sizes(self.max_classroom_size)
        # Collect classroom data from all schools
        all_classroom_data = []
        for school in self.schools:
            for i, classroom in enumerate(school.subgroups[1:], start=1):  # Skip index 0 for teachers
                student_ids = [student.id for student in classroom.people]

                # Sample up to 5 student IDs for visualization
                sampled_student_ids = random.sample(student_ids, min(5, len(student_ids)))

                all_classroom_data.append({
                    "| School ID": school.id,
                    "| Classroom": f"{i}",
                    "| Total Students": len(classroom.people),
                    "| Sample of IDs of Persons (Students)": ", ".join(map(str, sampled_student_ids))
                })

        # Convert the data to a DataFrame for better visualization
        df_classrooms = pd.DataFrame(all_classroom_data)

        print("\n===== Classroom Distribution =====")
        print(df_classrooms)

__init__(schools, education_sector_label='P', neighbour_schools=35, age_range=(0, 19), mandatory_age_range=(5, 18), teacher_student_ratio_primary=21, teacher_student_ratio_secondary=16, teacher_min_age=21, max_classroom_size=40)

Get closest schools to this output area, per age group (different schools admit pupils with different age ranges)

Parameters

schools: instance of Schools, with information on all schools in world. area: instance of Area. config: config dictionary.

Source code in june/distributors/school_distributor.py
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
def __init__(
    self,
    schools: Schools,
    education_sector_label="P",
    neighbour_schools: int = 35,
    age_range: Tuple[int, int] = (0, 19),
    mandatory_age_range: Tuple[int, int] = (5, 18),
    teacher_student_ratio_primary=21,
    teacher_student_ratio_secondary=16,
    teacher_min_age=21,
    max_classroom_size=40,
):
    """
    Get closest schools to this output area, per age group
    (different schools admit pupils with different age ranges)

    Parameters
    ----------
    schools:
        instance of Schools, with information on all schools in world.
    area:
        instance of Area.
    config:
        config dictionary.
    """
    self.schools = schools
    self.neighbour_schools = neighbour_schools
    self.school_age_range = age_range
    self.mandatory_school_age_range = mandatory_age_range
    self.education_sector_label = education_sector_label
    self.teacher_min_age = teacher_min_age
    self.teacher_student_ratio_primary = teacher_student_ratio_primary
    self.teacher_student_ratio_secondary = teacher_student_ratio_secondary
    self.max_classroom_size = max_classroom_size

    # Cache for pre-computed school assignments by area and age
    self._area_school_cache = {}

    # Cache for pre-computed gender weights by school group
    self._gender_weights_cache = {}

    # Optimized school capacity tracking
    self._school_capacity_tracker = {}

distribute_kids_to_school(areas)

Function to distribute kids to schools according to distance

Parameters:

Name Type Description Default
areas List[Area]
required
Source code in june/distributors/school_distributor.py
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
def distribute_kids_to_school(self, areas: List[Area]):
    """Function to distribute kids to schools according to distance

    Args:
        areas (List[Area]): 

    """
    logger.info("Distributing kids to schools")

    # Initialize optimizations
    if not self._area_school_cache:
        self._precompute_area_school_assignments(areas)

    if not self._school_capacity_tracker:
        self._initialize_school_capacity_tracking()

    for i, area in enumerate(areas):
        if i % 4000 == 0:
            logger.info(f"Distributed kids in {i} of {len(areas)} areas so far.")

        # Use pre-computed closest schools from cache
        closest_schools_by_age = self._area_school_cache[area.id]
        is_school_full = {agegroup: False for agegroup in self.schools.school_trees}

        self.distribute_mandatory_kids_to_school(
            area, is_school_full, closest_schools_by_age
        )
        self.distribute_non_mandatory_kids_to_school(
            area, is_school_full, closest_schools_by_age
        )
    logger.info("Kids distributed to schools")

    # Visualise the final distribution of kids to schools with a sample of Student IDs
    sample_data = []
    for school in random.sample(self.schools.members, min(10, len(self.schools.members))):
        # Get information about registered members
        total_registered = sum(len(members) for members in school.registered_members_ids.values())
        all_subgroups = list(school.registered_members_ids.keys())

        # Sample some IDs to display
        sampled_ids = []
        for subgroup, members in school.registered_members_ids.items():
            if members:
                # Take up to 2 from each subgroup
                for member_id in members[:2]:
                    sampled_ids.append(f"sg{subgroup}:{member_id}")

        sampled_ids = sampled_ids[:5]  # Limit to 5 total

        sample_data.append({
            "| School ID": school.id,
            "| Area": school.area.name if school.area else "Unknown Area",
            "| Total Students": len(school.students),
            "| Total Registered Members": total_registered,
            "| Subgroups": all_subgroups,
            "| Sample Registered Member IDs": sampled_ids
        })

    df_schools = pd.DataFrame(sample_data)
    print("\n===== Sample of Registered Members in Schools =====")
    print(df_schools)

distribute_mandatory_kids_to_school(area, is_school_full, closest_schools_by_age)

Send kids to the nearest school among the self.neighbour_schools, that has vacancies. If none of them has vacancies, pick one of them at random using gender-weighted selection (making it larger than it should be)

Parameters:

Name Type Description Default
area Area
required
is_school_full dict
required
closest_schools_by_age dict
required
Source code in june/distributors/school_distributor.py
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
def distribute_mandatory_kids_to_school(
    self, area: Area, is_school_full: dict, closest_schools_by_age: dict
):
    """Send kids to the nearest school among the self.neighbour_schools,
    that has vacancies. If none of them has vacancies, pick one of them
    at random using gender-weighted selection (making it larger than it should be)

    Args:
        area (Area): 
        is_school_full (dict): 
        closest_schools_by_age (dict): 

    """
    students_assigned = 0
    school_age_kids = 0
    for person in area.people:
        if (
            (person.age <= self.mandatory_school_age_range[1]
            and person.age >= self.mandatory_school_age_range[0]) 
            or (person.age == 19 and person.residence.spec == 'boarding_school')
        ):
            # Skip 18-year-olds living in student dorms - they should be university students, not school students
            if person.age == 18 and person.residence.spec == 'student_dorm':
                continue

            if person.age == 18 and person.residence.spec == 'household': 
                if person.residence.group.type == 'young_adult' or person.residence.group.type == 'flexible':
                    continue
                else:
                    # 66% chance they go to school, 34% chance they go to uni
                    if np.random.random() < 0.34:
                        # 34% chance they go to university - skip them
                        continue


            school_age_kids += 1
            if person.age not in is_school_full:
                continue
            if is_school_full[person.age]:
                # All schools are full, use gender-weighted selection from available schools
                available_schools = closest_schools_by_age[person.age][:self.neighbour_schools]
                school = self._select_school_with_gender_preference(person, available_schools)
            else:
                # Use optimized capacity tracking to find non-full schools
                candidate_schools = closest_schools_by_age[person.age][:self.neighbour_schools]
                non_full_schools = self._get_available_schools_fast(candidate_schools, is_school_full[person.age])

                if non_full_schools:
                    # Use gender-weighted selection among non-full schools
                    school = self._select_school_with_gender_preference(person, non_full_schools)
                else:
                    # All schools are full, mark as such and use gender-weighted selection from all
                    is_school_full[person.age] = True
                    available_schools = closest_schools_by_age[person.age][:self.neighbour_schools]
                    school = self._select_school_with_gender_preference(person, available_schools)
            # remove from working population
            if person.work_super_area is not None:
                person.work_super_area.remove_worker(person)
            school.add(person)
            # Update capacity tracker
            self._update_school_capacity(school)
            # Add to registered members
            subgroup = 0 if person.age > school.age_max else person.age - school.age_min + 1
            school.add_to_registered_members(person.id, subgroup_type=subgroup)
            students_assigned += 1

distribute_non_mandatory_kids_to_school(area, is_school_full, closest_schools_by_age)

For kids in age ranges that might go to school, but it is not mandatory send them to the closest school that has vacancies among the self.max_schools closests. If none of them has vacancies do not send them to school

Parameters:

Name Type Description Default
area Area
required
is_school_full dict
required
closest_schools_by_age dict
required
Source code in june/distributors/school_distributor.py
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
def distribute_non_mandatory_kids_to_school(
    self, area: Area, is_school_full: dict, closest_schools_by_age: dict
):
    """For kids in age ranges that might go to school, but it is not mandatory
    send them to the closest school that has vacancies among the self.max_schools closests.
    If none of them has vacancies do not send them to school

    Args:
        area (Area): 
        is_school_full (dict): 
        closest_schools_by_age (dict): 

    """
    for person in area.people:
        if (
            self.school_age_range[0]
            < person.age
            < self.mandatory_school_age_range[0]
            or self.mandatory_school_age_range[1]
            < person.age
            < self.school_age_range[1]
        ):
            # Skip 18-year-olds living in student dorms - they should be university students, not school students
            if person.age == 18 and hasattr(person, 'residence') and hasattr(person.residence, 'spec') and person.residence.spec == 'student_dorm':
                continue

            if person.age not in is_school_full or is_school_full[person.age]:
                continue
            else:
                # Find all suitable schools with capacity for this age group
                suitable_schools = []
                for i in range(min(self.neighbour_schools, len(closest_schools_by_age[person.age]))):
                    candidate_school = closest_schools_by_age[person.age][i]
                    # check number of students in that age group
                    yearindex = person.age - candidate_school.age_min + 1
                    if yearindex < len(candidate_school.subgroups):
                        n_pupils_age = len(candidate_school.subgroups[yearindex].people)
                        if (candidate_school.n_pupils < candidate_school.n_pupils_max) and (
                            n_pupils_age < (candidate_school.n_pupils_max / (candidate_school.age_max - candidate_school.age_min))
                        ):
                            suitable_schools.append(candidate_school)

                if suitable_schools:
                    # Use gender-weighted selection among suitable schools
                    school = self._select_school_with_gender_preference(person, suitable_schools)
                    if person.work_super_area is not None:
                        person.work_super_area.remove_worker(person)
                    school.add(person)
                    # Update capacity tracker
                    self._update_school_capacity(school)
                    # Add to registered members
                    subgroup = 0 if person.age > school.age_max else person.age - school.age_min + 1
                    school.add_to_registered_members(person.id, subgroup_type=subgroup)

distribute_teachers_to_school(super_area)

Assigns teachers to super area. The strategy is the following: we loop over the schools to divide them into two subgroups, primary schools and secondary schools. If a school is both, then we assign it randomly to one of the two. Then we loop over the workers in the super area to find the teachers, which we also divide into two subgroups analogously to the schools. We assign the teachers to the schools following a fix student to teacher ratio. We put a lower age limit to teachers at the age of 21.

Parameters:

Name Type Description Default
super_area SuperArea
required
Source code in june/distributors/school_distributor.py
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
def distribute_teachers_to_school(self, super_area: SuperArea):
    """Assigns teachers to super area. The strategy is the following:
    we loop over the schools to divide them into two subgroups,
    primary schools and secondary schools. If a school is both, then
    we assign it randomly to one of the two.
    Then we loop over the workers in the super area to find the teachers,
    which we also divide into two subgroups analogously to the schools.
    We assign the teachers to the schools following a fix student to teacher ratio.
    We put a lower age limit to teachers at the age of 21.

    Args:
        super_area (SuperArea): 

    """
    # separate schools in primary and secondary based on age ranges served
    primary_schools = []
    secondary_schools = []
    for area in super_area.areas:
        for school in area.schools:
            if school.n_pupils == 0:
                continue

            # Classify schools based on age ranges they serve
            # Primary: serves ages <= 11, Secondary: serves ages > 11
            serves_primary = school.age_min <= 11 and school.age_max <= 11
            serves_secondary = school.age_min > 11 or school.age_max > 11
            serves_both = school.age_min <= 11 and school.age_max > 11

            if serves_both:
                # All-through schools: assign based on which age range they serve more
                primary_ages = min(11, school.age_max) - school.age_min + 1
                total_ages = school.age_max - school.age_min + 1
                primary_weight = primary_ages / total_ages

                # Assign to category based on majority age coverage
                if primary_weight > 0.5:
                    primary_schools.append(school)
                else:
                    secondary_schools.append(school)
            elif serves_primary:
                primary_schools.append(school)
            elif serves_secondary:
                secondary_schools.append(school)
            else:
                # Fallback for edge cases - assign randomly
                idx = np.random.randint(0, 2)
                if idx == 0:
                    primary_schools.append(school)
                else:
                    secondary_schools.append(school)
    # assign teacher to student ratios in schools
    for school in primary_schools:
        school.n_teachers_max = int(
            np.round(
                school.n_pupils
                / np.random.poisson(self.teacher_student_ratio_primary)
            )
        )
    for school in secondary_schools:
        school.n_teachers_max = int(
            np.round(
                school.n_pupils
                / np.random.poisson(self.teacher_student_ratio_secondary)
            )
        )

    np.random.shuffle(primary_schools)
    np.random.shuffle(secondary_schools)
    all_teachers = [
        person
        for person in super_area.workers
        if person.sector == self.education_sector_label
        and person.age > self.teacher_min_age
        and person.primary_activity is None
    ]
    primary_teachers = []
    secondary_teachers = []
    extra_teachers = []
    for teacher in all_teachers:
        if teacher.sub_sector == "teacher_primary":
            primary_teachers.append(teacher)
        elif teacher.sub_sector == "teacher_secondary":
            secondary_teachers.append(teacher)
        else:
            extra_teachers.append(teacher)
    np.random.shuffle(primary_teachers)
    np.random.shuffle(secondary_teachers)
    np.random.shuffle(extra_teachers)
    while primary_teachers:
        all_filled = True
        for primary_school in primary_schools:
            if primary_school.n_pupils == 0:
                continue
            if primary_school.n_teachers < primary_school.n_teachers_max:
                all_filled = False
                teacher = primary_teachers.pop()
                if not primary_teachers:
                    all_filled = True
                    break
                primary_school.add(teacher)
                primary_school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
                teacher.lockdown_status = "key_worker"
        if all_filled:
            break

    while secondary_teachers:
        all_filled = True
        for secondary_school in secondary_schools:
            if secondary_school.n_pupils == 0:
                continue
            if secondary_school.n_teachers < secondary_school.n_teachers_max:
                all_filled = False
                teacher = secondary_teachers.pop()
                if not secondary_teachers:
                    all_filled = True
                    break
                secondary_school.add(teacher)
                secondary_school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
                teacher.lockdown_status = "key_worker"
        if all_filled:
            break

    remaining_teachers = primary_teachers + secondary_teachers + extra_teachers
    empty_schools = [
        school
        for school in primary_schools + secondary_schools
        if school.n_pupils > 0 and school.n_teachers == 0
    ]
    for school in empty_schools:
        if not remaining_teachers:
            break
        teacher = remaining_teachers.pop()
        school.add(teacher)
        school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
        teacher.lockdown_status = "key_worker"

    while remaining_teachers:
        all_filled = True
        for school in primary_schools + secondary_schools:
            if school.n_pupils == 0:
                continue
            if school.n_teachers < school.n_teachers_max:
                all_filled = False
                teacher = remaining_teachers.pop()
                if not remaining_teachers:
                    all_filled = True
                    break
                school.add(teacher)
                school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
                teacher.lockdown_status = "key_worker"
        if all_filled:
            break

distribute_teachers_to_schools_in_super_areas(super_areas)

Parameters:

Name Type Description Default
super_areas List[SuperArea]
required
Source code in june/distributors/school_distributor.py
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
def distribute_teachers_to_schools_in_super_areas(
    self, super_areas: List[SuperArea]
):
    """

    Args:
        super_areas (List[SuperArea]): 

    """
    for super_area in super_areas:
        self.distribute_teachers_to_school(super_area)

    classroom_distribution_data = []
    for super_area in super_areas:
        for area in super_area.areas:
            for school in area.schools:
                # Gather data for each classroom (starting from index 1)
                for classroom_id, classroom in enumerate(school.subgroups[1:], start=1):
                    if not classroom.people:
                        continue  # Skip empty classrooms

                    classroom_distribution_data.append({
                        "| School ID": school.id if hasattr(school, 'id') else "Unknown",
                        "| Classroom ID": classroom_id,
                        "| Total Students in Classroom": len(classroom.people)
                    })

    # Convert data to a DataFrame for easy viewing and show only a sample of 10
    df_classrooms = pd.DataFrame(classroom_distribution_data)
    print("\n===== Teacher Assignment to Schools =====")
    print(df_classrooms.sample(n=min(10, len(df_classrooms))))

find_jobs(config) staticmethod

Parameters:

Name Type Description Default
config dict
required
Source code in june/distributors/school_distributor.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
@staticmethod
def find_jobs(config: dict):
    """

    Args:
        config (dict): 

    """
    education_sector_label = []
    for value1 in config.values():
        if isinstance(value1, dict):
            for value2 in value1.values():
                education_sector_label.append(value2["sector_id"])
    return education_sector_label

from_file(schools, config_filename=default_config_filename) classmethod

Initialise SchoolDistributor from path to its config file

Parameters:

Name Type Description Default
schools Schools

instance of Schools, with information on all schools in world.

required
config_filename str

(Default value = default_config_filename)

default_config_filename
# Tuple[int, int]

(Default value = (5, 18)

required

Returns:

Type Description
SchoolDistributor

SchoolDistributor instance:

Source code in june/distributors/school_distributor.py
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
@classmethod
def from_file(
    cls,
    schools: "Schools",
    config_filename: str = default_config_filename,
    # mandatory_age_range: Tuple[int, int] = (5, 18),#part of config ?
) -> "SchoolDistributor":
    """Initialise SchoolDistributor from path to its config file

    Args:
        schools ("Schools"): instance of Schools, with information on all schools in world.
        config_filename (str, optional): (Default value = default_config_filename)
        # (Tuple[int, int], optional): (Default value = (5, 18)

    Returns:
        SchoolDistributor instance: 

    """
    with open(config_filename) as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    education_sector_label = SchoolDistributor.find_jobs(config)
    return SchoolDistributor(
        schools,
        education_sector_label,
        config["neighbour_schools"],
        config["age_range"],
        config["mandatory_age_range"],
        config["teacher_min_age"],
        config["max_classroom_size"],
    )

from_geography(geography, config_filename=default_config_filename) classmethod

Parameters:

Name Type Description Default
geography Geography
required
config_filename str

(Default value = default_config_filename)

default_config_filename
Source code in june/distributors/school_distributor.py
136
137
138
139
140
141
142
143
144
145
146
147
@classmethod
def from_geography(
    cls, geography: Geography, config_filename: str = default_config_filename
):
    """

    Args:
        geography (Geography): 
        config_filename (str, optional): (Default value = default_config_filename)

    """
    return cls.from_file(geography.schools, config_filename)

limit_classroom_sizes()

Limit subgroup sizes that represent class rooms to a maximum number of students. If maximum number is exceeded create new subgroups to distribute students homogeneously

Source code in june/distributors/school_distributor.py
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
def limit_classroom_sizes(
    self,
):
    """Limit subgroup sizes that represent class rooms to a maximum number of students.
    If maximum number is exceeded create new subgroups to distribute students homogeneously

    """
    for school in self.schools:
        school.limit_classroom_sizes(self.max_classroom_size)
    # Collect classroom data from all schools
    all_classroom_data = []
    for school in self.schools:
        for i, classroom in enumerate(school.subgroups[1:], start=1):  # Skip index 0 for teachers
            student_ids = [student.id for student in classroom.people]

            # Sample up to 5 student IDs for visualization
            sampled_student_ids = random.sample(student_ids, min(5, len(student_ids)))

            all_classroom_data.append({
                "| School ID": school.id,
                "| Classroom": f"{i}",
                "| Total Students": len(classroom.people),
                "| Sample of IDs of Persons (Students)": ", ".join(map(str, sampled_student_ids))
            })

    # Convert the data to a DataFrame for better visualization
    df_classrooms = pd.DataFrame(all_classroom_data)

    print("\n===== Classroom Distribution =====")
    print(df_classrooms)