School distributor

`SchoolDistributor`

Distributes students in an area to different schools

Source code in june/distributors/school_distributor.py

class SchoolDistributor:
    """Distributes students in an area to different schools"""

    def __init__(
        self,
        schools: Schools,
        education_sector_label="P",
        neighbour_schools: int = 35,
        age_range: Tuple[int, int] = (0, 19),
        mandatory_age_range: Tuple[int, int] = (5, 18),
        teacher_student_ratio_primary=21,
        teacher_student_ratio_secondary=16,
        teacher_min_age=21,
        max_classroom_size=40,
    ):
        """
        Get closest schools to this output area, per age group
        (different schools admit pupils with different age ranges)

        Parameters
        ----------
        schools:
            instance of Schools, with information on all schools in world.
        area:
            instance of Area.
        config:
            config dictionary.
        """
        self.schools = schools
        self.neighbour_schools = neighbour_schools
        self.school_age_range = age_range
        self.mandatory_school_age_range = mandatory_age_range
        self.education_sector_label = education_sector_label
        self.teacher_min_age = teacher_min_age
        self.teacher_student_ratio_primary = teacher_student_ratio_primary
        self.teacher_student_ratio_secondary = teacher_student_ratio_secondary
        self.max_classroom_size = max_classroom_size

        # Cache for pre-computed school assignments by area and age
        self._area_school_cache = {}

        # Cache for pre-computed gender weights by school group
        self._gender_weights_cache = {}

        # Optimized school capacity tracking
        self._school_capacity_tracker = {}

    @classmethod
    def from_file(
        cls,
        schools: "Schools",
        config_filename: str = default_config_filename,
        # mandatory_age_range: Tuple[int, int] = (5, 18),#part of config ?
    ) -> "SchoolDistributor":
        """Initialise SchoolDistributor from path to its config file

        Args:
            schools ("Schools"): instance of Schools, with information on all schools in world.
            config_filename (str, optional): (Default value = default_config_filename)
            # (Tuple[int, int], optional): (Default value = (5, 18)

        Returns:
            SchoolDistributor instance: 

        """
        with open(config_filename) as f:
            config = yaml.load(f, Loader=yaml.FullLoader)
        education_sector_label = SchoolDistributor.find_jobs(config)
        return SchoolDistributor(
            schools,
            education_sector_label,
            config["neighbour_schools"],
            config["age_range"],
            config["mandatory_age_range"],
            config["teacher_min_age"],
            config["max_classroom_size"],
        )

    @classmethod
    def from_geography(
        cls, geography: Geography, config_filename: str = default_config_filename
    ):
        """

        Args:
            geography (Geography): 
            config_filename (str, optional): (Default value = default_config_filename)

        """
        return cls.from_file(geography.schools, config_filename)

    @staticmethod
    def find_jobs(config: dict):
        """

        Args:
            config (dict): 

        """
        education_sector_label = []
        for value1 in config.values():
            if isinstance(value1, dict):
                for value2 in value1.values():
                    education_sector_label.append(value2["sector_id"])
        return education_sector_label

    def _precompute_area_school_assignments(self, areas: List[Area]):
        """Pre-compute closest schools for each area and age combination to avoid repeated queries.
        This optimization reduces the number of BallTree queries from millions to thousands.

        Args:
            areas (List[Area]): 

        """
        logger.info("Pre-computing school assignments for all areas...")

        for i, area in enumerate(areas):
            if i % 1000 == 0:
                logger.info(f"Pre-computed assignments for {i} of {len(areas)} areas so far.")

            area_cache = {}

            for agegroup in self.schools.school_trees:
                closest_schools_idx = self.schools.get_closest_schools(
                    agegroup, area.coordinates, self.neighbour_schools
                )
                closest_schools = []
                for idx in closest_schools_idx:
                    real_idx = self.schools.school_agegroup_to_global_indices[agegroup][idx]
                    closest_schools.append(self.schools.members[real_idx])

                area_cache[agegroup] = closest_schools

            self._area_school_cache[area.id] = area_cache

        logger.info("Completed pre-computing school assignments")

    def _initialize_school_capacity_tracking(self):
        """Initialize optimized school capacity tracking data structures"""
        self._school_capacity_tracker = {}

        for school in self.schools.members:
            self._school_capacity_tracker[school.id] = {
                'current_pupils': school.n_pupils,
                'max_pupils': school.n_pupils_max,
                'is_full': school.n_pupils >= school.n_pupils_max
            }

    def _update_school_capacity(self, school):
        """Efficiently update school capacity tracking when a student is added

        Args:
            school: 

        """
        tracker = self._school_capacity_tracker[school.id]
        tracker['current_pupils'] += 1
        tracker['is_full'] = tracker['current_pupils'] >= tracker['max_pupils']

    def _get_available_schools_fast(self, schools_list, age_group_full):
        """Fast filtering of available schools using capacity tracker and numba

        Args:
            schools_list: 
            age_group_full: 

        """
        if age_group_full:
            return schools_list  # All schools available when age group is marked full

        # Extract fullness status as numpy array for numba processing
        is_full_array = np.array([self._school_capacity_tracker[school.id]['is_full'] 
                                 for school in schools_list], dtype=bool)

        # Use numba-accelerated filtering
        available_indices = _filter_available_schools_numba(is_full_array)

        # Return corresponding schools
        return [schools_list[i] for i in available_indices]

    def distribute_kids_to_school(self, areas: List[Area]):
        """Function to distribute kids to schools according to distance

        Args:
            areas (List[Area]): 

        """
        logger.info("Distributing kids to schools")

        # Initialize optimizations
        if not self._area_school_cache:
            self._precompute_area_school_assignments(areas)

        if not self._school_capacity_tracker:
            self._initialize_school_capacity_tracking()

        for i, area in enumerate(areas):
            if i % 4000 == 0:
                logger.info(f"Distributed kids in {i} of {len(areas)} areas so far.")

            # Use pre-computed closest schools from cache
            closest_schools_by_age = self._area_school_cache[area.id]
            is_school_full = {agegroup: False for agegroup in self.schools.school_trees}

            self.distribute_mandatory_kids_to_school(
                area, is_school_full, closest_schools_by_age
            )
            self.distribute_non_mandatory_kids_to_school(
                area, is_school_full, closest_schools_by_age
            )
        logger.info("Kids distributed to schools")

        # Visualise the final distribution of kids to schools with a sample of Student IDs
        sample_data = []
        for school in random.sample(self.schools.members, min(10, len(self.schools.members))):
            # Get information about registered members
            total_registered = sum(len(members) for members in school.registered_members_ids.values())
            all_subgroups = list(school.registered_members_ids.keys())

            # Sample some IDs to display
            sampled_ids = []
            for subgroup, members in school.registered_members_ids.items():
                if members:
                    # Take up to 2 from each subgroup
                    for member_id in members[:2]:
                        sampled_ids.append(f"sg{subgroup}:{member_id}")

            sampled_ids = sampled_ids[:5]  # Limit to 5 total

            sample_data.append({
                "| School ID": school.id,
                "| Area": school.area.name if school.area else "Unknown Area",
                "| Total Students": len(school.students),
                "| Total Registered Members": total_registered,
                "| Subgroups": all_subgroups,
                "| Sample Registered Member IDs": sampled_ids
            })

        df_schools = pd.DataFrame(sample_data)
        print("\n===== Sample of Registered Members in Schools =====")
        print(df_schools)

    def _get_gender_weights_cache_key(self, available_schools):
        """Generate cache key for a set of available schools

        Args:
            available_schools: 

        """
        return tuple(sorted(school.id for school in available_schools))

    def _precompute_gender_weights(self, available_schools):
        """Pre-compute gender-based weights for a set of schools using numba acceleration

        Args:
            available_schools: 

        """
        cache_key = self._get_gender_weights_cache_key(available_schools)

        if cache_key in self._gender_weights_cache:
            return self._gender_weights_cache[cache_key]

        # Extract gender ratios as numpy arrays for numba processing
        boy_ratios = np.array([school.boy_ratio for school in available_schools], dtype=np.float64)
        girl_ratios = np.array([school.girl_ratio for school in available_schools], dtype=np.float64)

        # Use numba-accelerated computation
        male_probs = _compute_gender_weights_numba(boy_ratios, girl_ratios, False)
        female_probs = _compute_gender_weights_numba(boy_ratios, girl_ratios, True)

        result = {
            'male_probs': male_probs,
            'female_probs': female_probs,
            'schools': available_schools
        }

        self._gender_weights_cache[cache_key] = result
        return result

    def _select_school_with_gender_preference(self, person, available_schools):
        """Select school from available schools based on person's gender and school gender ratios.
        Uses pre-computed weights for better performance.

        Args:
            person: 
            available_schools: 

        """
        if not available_schools:
            return None

        if len(available_schools) == 1:
            return available_schools[0]

        # Get or compute cached weights
        weights_data = self._precompute_gender_weights(available_schools)

        # Select appropriate probabilities based on gender
        if person.sex == 'f':
            probabilities = weights_data['female_probs']
        else:
            probabilities = weights_data['male_probs']

        # Select school using weighted random choice
        return np.random.choice(available_schools, p=probabilities)

    def distribute_mandatory_kids_to_school(
        self, area: Area, is_school_full: dict, closest_schools_by_age: dict
    ):
        """Send kids to the nearest school among the self.neighbour_schools,
        that has vacancies. If none of them has vacancies, pick one of them
        at random using gender-weighted selection (making it larger than it should be)

        Args:
            area (Area): 
            is_school_full (dict): 
            closest_schools_by_age (dict): 

        """
        students_assigned = 0
        school_age_kids = 0
        for person in area.people:
            if (
                (person.age <= self.mandatory_school_age_range[1]
                and person.age >= self.mandatory_school_age_range[0]) 
                or (person.age == 19 and person.residence.spec == 'boarding_school')
            ):
                # Skip 18-year-olds living in student dorms - they should be university students, not school students
                if person.age == 18 and person.residence.spec == 'student_dorm':
                    continue

                if person.age == 18 and person.residence.spec == 'household': 
                    if person.residence.group.type == 'young_adult' or person.residence.group.type == 'flexible':
                        continue
                    else:
                        # 66% chance they go to school, 34% chance they go to uni
                        if np.random.random() < 0.34:
                            # 34% chance they go to university - skip them
                            continue


                school_age_kids += 1
                if person.age not in is_school_full:
                    continue
                if is_school_full[person.age]:
                    # All schools are full, use gender-weighted selection from available schools
                    available_schools = closest_schools_by_age[person.age][:self.neighbour_schools]
                    school = self._select_school_with_gender_preference(person, available_schools)
                else:
                    # Use optimized capacity tracking to find non-full schools
                    candidate_schools = closest_schools_by_age[person.age][:self.neighbour_schools]
                    non_full_schools = self._get_available_schools_fast(candidate_schools, is_school_full[person.age])

                    if non_full_schools:
                        # Use gender-weighted selection among non-full schools
                        school = self._select_school_with_gender_preference(person, non_full_schools)
                    else:
                        # All schools are full, mark as such and use gender-weighted selection from all
                        is_school_full[person.age] = True
                        available_schools = closest_schools_by_age[person.age][:self.neighbour_schools]
                        school = self._select_school_with_gender_preference(person, available_schools)
                # remove from working population
                if person.work_super_area is not None:
                    person.work_super_area.remove_worker(person)
                school.add(person)
                # Update capacity tracker
                self._update_school_capacity(school)
                # Add to registered members
                subgroup = 0 if person.age > school.age_max else person.age - school.age_min + 1
                school.add_to_registered_members(person.id, subgroup_type=subgroup)
                students_assigned += 1

    def distribute_non_mandatory_kids_to_school(
        self, area: Area, is_school_full: dict, closest_schools_by_age: dict
    ):
        """For kids in age ranges that might go to school, but it is not mandatory
        send them to the closest school that has vacancies among the self.max_schools closests.
        If none of them has vacancies do not send them to school

        Args:
            area (Area): 
            is_school_full (dict): 
            closest_schools_by_age (dict): 

        """
        for person in area.people:
            if (
                self.school_age_range[0]
                < person.age
                < self.mandatory_school_age_range[0]
                or self.mandatory_school_age_range[1]
                < person.age
                < self.school_age_range[1]
            ):
                # Skip 18-year-olds living in student dorms - they should be university students, not school students
                if person.age == 18 and hasattr(person, 'residence') and hasattr(person.residence, 'spec') and person.residence.spec == 'student_dorm':
                    continue

                if person.age not in is_school_full or is_school_full[person.age]:
                    continue
                else:
                    # Find all suitable schools with capacity for this age group
                    suitable_schools = []
                    for i in range(min(self.neighbour_schools, len(closest_schools_by_age[person.age]))):
                        candidate_school = closest_schools_by_age[person.age][i]
                        # check number of students in that age group
                        yearindex = person.age - candidate_school.age_min + 1
                        if yearindex < len(candidate_school.subgroups):
                            n_pupils_age = len(candidate_school.subgroups[yearindex].people)
                            if (candidate_school.n_pupils < candidate_school.n_pupils_max) and (
                                n_pupils_age < (candidate_school.n_pupils_max / (candidate_school.age_max - candidate_school.age_min))
                            ):
                                suitable_schools.append(candidate_school)

                    if suitable_schools:
                        # Use gender-weighted selection among suitable schools
                        school = self._select_school_with_gender_preference(person, suitable_schools)
                        if person.work_super_area is not None:
                            person.work_super_area.remove_worker(person)
                        school.add(person)
                        # Update capacity tracker
                        self._update_school_capacity(school)
                        # Add to registered members
                        subgroup = 0 if person.age > school.age_max else person.age - school.age_min + 1
                        school.add_to_registered_members(person.id, subgroup_type=subgroup)

    def distribute_teachers_to_schools_in_super_areas(
        self, super_areas: List[SuperArea]
    ):
        """

        Args:
            super_areas (List[SuperArea]): 

        """
        for super_area in super_areas:
            self.distribute_teachers_to_school(super_area)

        classroom_distribution_data = []
        for super_area in super_areas:
            for area in super_area.areas:
                for school in area.schools:
                    # Gather data for each classroom (starting from index 1)
                    for classroom_id, classroom in enumerate(school.subgroups[1:], start=1):
                        if not classroom.people:
                            continue  # Skip empty classrooms

                        classroom_distribution_data.append({
                            "| School ID": school.id if hasattr(school, 'id') else "Unknown",
                            "| Classroom ID": classroom_id,
                            "| Total Students in Classroom": len(classroom.people)
                        })

        # Convert data to a DataFrame for easy viewing and show only a sample of 10
        df_classrooms = pd.DataFrame(classroom_distribution_data)
        print("\n===== Teacher Assignment to Schools =====")
        print(df_classrooms.sample(n=min(10, len(df_classrooms))))

    def distribute_teachers_to_school(self, super_area: SuperArea):
        """Assigns teachers to super area. The strategy is the following:
        we loop over the schools to divide them into two subgroups,
        primary schools and secondary schools. If a school is both, then
        we assign it randomly to one of the two.
        Then we loop over the workers in the super area to find the teachers,
        which we also divide into two subgroups analogously to the schools.
        We assign the teachers to the schools following a fix student to teacher ratio.
        We put a lower age limit to teachers at the age of 21.

        Args:
            super_area (SuperArea): 

        """
        # separate schools in primary and secondary based on age ranges served
        primary_schools = []
        secondary_schools = []
        for area in super_area.areas:
            for school in area.schools:
                if school.n_pupils == 0:
                    continue

                # Classify schools based on age ranges they serve
                # Primary: serves ages <= 11, Secondary: serves ages > 11
                serves_primary = school.age_min <= 11 and school.age_max <= 11
                serves_secondary = school.age_min > 11 or school.age_max > 11
                serves_both = school.age_min <= 11 and school.age_max > 11

                if serves_both:
                    # All-through schools: assign based on which age range they serve more
                    primary_ages = min(11, school.age_max) - school.age_min + 1
                    total_ages = school.age_max - school.age_min + 1
                    primary_weight = primary_ages / total_ages

                    # Assign to category based on majority age coverage
                    if primary_weight > 0.5:
                        primary_schools.append(school)
                    else:
                        secondary_schools.append(school)
                elif serves_primary:
                    primary_schools.append(school)
                elif serves_secondary:
                    secondary_schools.append(school)
                else:
                    # Fallback for edge cases - assign randomly
                    idx = np.random.randint(0, 2)
                    if idx == 0:
                        primary_schools.append(school)
                    else:
                        secondary_schools.append(school)
        # assign teacher to student ratios in schools
        for school in primary_schools:
            school.n_teachers_max = int(
                np.round(
                    school.n_pupils
                    / np.random.poisson(self.teacher_student_ratio_primary)
                )
            )
        for school in secondary_schools:
            school.n_teachers_max = int(
                np.round(
                    school.n_pupils
                    / np.random.poisson(self.teacher_student_ratio_secondary)
                )
            )

        np.random.shuffle(primary_schools)
        np.random.shuffle(secondary_schools)
        all_teachers = [
            person
            for person in super_area.workers
            if person.sector == self.education_sector_label
            and person.age > self.teacher_min_age
            and person.primary_activity is None
        ]
        primary_teachers = []
        secondary_teachers = []
        extra_teachers = []
        for teacher in all_teachers:
            if teacher.sub_sector == "teacher_primary":
                primary_teachers.append(teacher)
            elif teacher.sub_sector == "teacher_secondary":
                secondary_teachers.append(teacher)
            else:
                extra_teachers.append(teacher)
        np.random.shuffle(primary_teachers)
        np.random.shuffle(secondary_teachers)
        np.random.shuffle(extra_teachers)
        while primary_teachers:
            all_filled = True
            for primary_school in primary_schools:
                if primary_school.n_pupils == 0:
                    continue
                if primary_school.n_teachers < primary_school.n_teachers_max:
                    all_filled = False
                    teacher = primary_teachers.pop()
                    if not primary_teachers:
                        all_filled = True
                        break
                    primary_school.add(teacher)
                    primary_school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
                    teacher.lockdown_status = "key_worker"
            if all_filled:
                break

        while secondary_teachers:
            all_filled = True
            for secondary_school in secondary_schools:
                if secondary_school.n_pupils == 0:
                    continue
                if secondary_school.n_teachers < secondary_school.n_teachers_max:
                    all_filled = False
                    teacher = secondary_teachers.pop()
                    if not secondary_teachers:
                        all_filled = True
                        break
                    secondary_school.add(teacher)
                    secondary_school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
                    teacher.lockdown_status = "key_worker"
            if all_filled:
                break

        remaining_teachers = primary_teachers + secondary_teachers + extra_teachers
        empty_schools = [
            school
            for school in primary_schools + secondary_schools
            if school.n_pupils > 0 and school.n_teachers == 0
        ]
        for school in empty_schools:
            if not remaining_teachers:
                break
            teacher = remaining_teachers.pop()
            school.add(teacher)
            school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
            teacher.lockdown_status = "key_worker"

        while remaining_teachers:
            all_filled = True
            for school in primary_schools + secondary_schools:
                if school.n_pupils == 0:
                    continue
                if school.n_teachers < school.n_teachers_max:
                    all_filled = False
                    teacher = remaining_teachers.pop()
                    if not remaining_teachers:
                        all_filled = True
                        break
                    school.add(teacher)
                    school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
                    teacher.lockdown_status = "key_worker"
            if all_filled:
                break

    def limit_classroom_sizes(
        self,
    ):
        """Limit subgroup sizes that represent class rooms to a maximum number of students.
        If maximum number is exceeded create new subgroups to distribute students homogeneously

        """
        for school in self.schools:
            school.limit_classroom_sizes(self.max_classroom_size)
        # Collect classroom data from all schools
        all_classroom_data = []
        for school in self.schools:
            for i, classroom in enumerate(school.subgroups[1:], start=1):  # Skip index 0 for teachers
                student_ids = [student.id for student in classroom.people]

                # Sample up to 5 student IDs for visualization
                sampled_student_ids = random.sample(student_ids, min(5, len(student_ids)))

                all_classroom_data.append({
                    "| School ID": school.id,
                    "| Classroom": f"{i}",
                    "| Total Students": len(classroom.people),
                    "| Sample of IDs of Persons (Students)": ", ".join(map(str, sampled_student_ids))
                })

        # Convert the data to a DataFrame for better visualization
        df_classrooms = pd.DataFrame(all_classroom_data)

        print("\n===== Classroom Distribution =====")
        print(df_classrooms)

`init(schools, education_sector_label='P', neighbour_schools=35, age_range=(0, 19), mandatory_age_range=(5, 18), teacher_student_ratio_primary=21, teacher_student_ratio_secondary=16, teacher_min_age=21, max_classroom_size=40)`

Get closest schools to this output area, per age group (different schools admit pupils with different age ranges)

Parameters

schools: instance of Schools, with information on all schools in world. area: instance of Area. config: config dictionary.

Source code in june/distributors/school_distributor.py

def __init__(
    self,
    schools: Schools,
    education_sector_label="P",
    neighbour_schools: int = 35,
    age_range: Tuple[int, int] = (0, 19),
    mandatory_age_range: Tuple[int, int] = (5, 18),
    teacher_student_ratio_primary=21,
    teacher_student_ratio_secondary=16,
    teacher_min_age=21,
    max_classroom_size=40,
):
    """
    Get closest schools to this output area, per age group
    (different schools admit pupils with different age ranges)

    Parameters
    ----------
    schools:
        instance of Schools, with information on all schools in world.
    area:
        instance of Area.
    config:
        config dictionary.
    """
    self.schools = schools
    self.neighbour_schools = neighbour_schools
    self.school_age_range = age_range
    self.mandatory_school_age_range = mandatory_age_range
    self.education_sector_label = education_sector_label
    self.teacher_min_age = teacher_min_age
    self.teacher_student_ratio_primary = teacher_student_ratio_primary
    self.teacher_student_ratio_secondary = teacher_student_ratio_secondary
    self.max_classroom_size = max_classroom_size

    # Cache for pre-computed school assignments by area and age
    self._area_school_cache = {}

    # Cache for pre-computed gender weights by school group
    self._gender_weights_cache = {}

    # Optimized school capacity tracking
    self._school_capacity_tracker = {}

`distribute_kids_to_school(areas)`

Function to distribute kids to schools according to distance

Parameters:

Name	Type	Description	Default
`areas`	`List[Area]`		required

Source code in june/distributors/school_distributor.py

def distribute_kids_to_school(self, areas: List[Area]):
    """Function to distribute kids to schools according to distance

    Args:
        areas (List[Area]): 

    """
    logger.info("Distributing kids to schools")

    # Initialize optimizations
    if not self._area_school_cache:
        self._precompute_area_school_assignments(areas)

    if not self._school_capacity_tracker:
        self._initialize_school_capacity_tracking()

    for i, area in enumerate(areas):
        if i % 4000 == 0:
            logger.info(f"Distributed kids in {i} of {len(areas)} areas so far.")

        # Use pre-computed closest schools from cache
        closest_schools_by_age = self._area_school_cache[area.id]
        is_school_full = {agegroup: False for agegroup in self.schools.school_trees}

        self.distribute_mandatory_kids_to_school(
            area, is_school_full, closest_schools_by_age
        )
        self.distribute_non_mandatory_kids_to_school(
            area, is_school_full, closest_schools_by_age
        )
    logger.info("Kids distributed to schools")

    # Visualise the final distribution of kids to schools with a sample of Student IDs
    sample_data = []
    for school in random.sample(self.schools.members, min(10, len(self.schools.members))):
        # Get information about registered members
        total_registered = sum(len(members) for members in school.registered_members_ids.values())
        all_subgroups = list(school.registered_members_ids.keys())

        # Sample some IDs to display
        sampled_ids = []
        for subgroup, members in school.registered_members_ids.items():
            if members:
                # Take up to 2 from each subgroup
                for member_id in members[:2]:
                    sampled_ids.append(f"sg{subgroup}:{member_id}")

        sampled_ids = sampled_ids[:5]  # Limit to 5 total

        sample_data.append({
            "| School ID": school.id,
            "| Area": school.area.name if school.area else "Unknown Area",
            "| Total Students": len(school.students),
            "| Total Registered Members": total_registered,
            "| Subgroups": all_subgroups,
            "| Sample Registered Member IDs": sampled_ids
        })

    df_schools = pd.DataFrame(sample_data)
    print("\n===== Sample of Registered Members in Schools =====")
    print(df_schools)

`distribute_mandatory_kids_to_school(area, is_school_full, closest_schools_by_age)`

Send kids to the nearest school among the self.neighbour_schools, that has vacancies. If none of them has vacancies, pick one of them at random using gender-weighted selection (making it larger than it should be)

Parameters:

Name	Type	Default
`area`	`Area`	required
`is_school_full`	`dict`	required
`closest_schools_by_age`	`dict`	required

Source code in june/distributors/school_distributor.py

def distribute_mandatory_kids_to_school(
    self, area: Area, is_school_full: dict, closest_schools_by_age: dict
):
    """Send kids to the nearest school among the self.neighbour_schools,
    that has vacancies. If none of them has vacancies, pick one of them
    at random using gender-weighted selection (making it larger than it should be)

    Args:
        area (Area): 
        is_school_full (dict): 
        closest_schools_by_age (dict): 

    """
    students_assigned = 0
    school_age_kids = 0
    for person in area.people:
        if (
            (person.age <= self.mandatory_school_age_range[1]
            and person.age >= self.mandatory_school_age_range[0]) 
            or (person.age == 19 and person.residence.spec == 'boarding_school')
        ):
            # Skip 18-year-olds living in student dorms - they should be university students, not school students
            if person.age == 18 and person.residence.spec == 'student_dorm':
                continue

            if person.age == 18 and person.residence.spec == 'household': 
                if person.residence.group.type == 'young_adult' or person.residence.group.type == 'flexible':
                    continue
                else:
                    # 66% chance they go to school, 34% chance they go to uni
                    if np.random.random() < 0.34:
                        # 34% chance they go to university - skip them
                        continue


            school_age_kids += 1
            if person.age not in is_school_full:
                continue
            if is_school_full[person.age]:
                # All schools are full, use gender-weighted selection from available schools
                available_schools = closest_schools_by_age[person.age][:self.neighbour_schools]
                school = self._select_school_with_gender_preference(person, available_schools)
            else:
                # Use optimized capacity tracking to find non-full schools
                candidate_schools = closest_schools_by_age[person.age][:self.neighbour_schools]
                non_full_schools = self._get_available_schools_fast(candidate_schools, is_school_full[person.age])

                if non_full_schools:
                    # Use gender-weighted selection among non-full schools
                    school = self._select_school_with_gender_preference(person, non_full_schools)
                else:
                    # All schools are full, mark as such and use gender-weighted selection from all
                    is_school_full[person.age] = True
                    available_schools = closest_schools_by_age[person.age][:self.neighbour_schools]
                    school = self._select_school_with_gender_preference(person, available_schools)
            # remove from working population
            if person.work_super_area is not None:
                person.work_super_area.remove_worker(person)
            school.add(person)
            # Update capacity tracker
            self._update_school_capacity(school)
            # Add to registered members
            subgroup = 0 if person.age > school.age_max else person.age - school.age_min + 1
            school.add_to_registered_members(person.id, subgroup_type=subgroup)
            students_assigned += 1

`distribute_non_mandatory_kids_to_school(area, is_school_full, closest_schools_by_age)`

For kids in age ranges that might go to school, but it is not mandatory send them to the closest school that has vacancies among the self.max_schools closests. If none of them has vacancies do not send them to school

Parameters:

Name	Type	Default
`area`	`Area`	required
`is_school_full`	`dict`	required
`closest_schools_by_age`	`dict`	required

Source code in june/distributors/school_distributor.py

def distribute_non_mandatory_kids_to_school(
    self, area: Area, is_school_full: dict, closest_schools_by_age: dict
):
    """For kids in age ranges that might go to school, but it is not mandatory
    send them to the closest school that has vacancies among the self.max_schools closests.
    If none of them has vacancies do not send them to school

    Args:
        area (Area): 
        is_school_full (dict): 
        closest_schools_by_age (dict): 

    """
    for person in area.people:
        if (
            self.school_age_range[0]
            < person.age
            < self.mandatory_school_age_range[0]
            or self.mandatory_school_age_range[1]
            < person.age
            < self.school_age_range[1]
        ):
            # Skip 18-year-olds living in student dorms - they should be university students, not school students
            if person.age == 18 and hasattr(person, 'residence') and hasattr(person.residence, 'spec') and person.residence.spec == 'student_dorm':
                continue

            if person.age not in is_school_full or is_school_full[person.age]:
                continue
            else:
                # Find all suitable schools with capacity for this age group
                suitable_schools = []
                for i in range(min(self.neighbour_schools, len(closest_schools_by_age[person.age]))):
                    candidate_school = closest_schools_by_age[person.age][i]
                    # check number of students in that age group
                    yearindex = person.age - candidate_school.age_min + 1
                    if yearindex < len(candidate_school.subgroups):
                        n_pupils_age = len(candidate_school.subgroups[yearindex].people)
                        if (candidate_school.n_pupils < candidate_school.n_pupils_max) and (
                            n_pupils_age < (candidate_school.n_pupils_max / (candidate_school.age_max - candidate_school.age_min))
                        ):
                            suitable_schools.append(candidate_school)

                if suitable_schools:
                    # Use gender-weighted selection among suitable schools
                    school = self._select_school_with_gender_preference(person, suitable_schools)
                    if person.work_super_area is not None:
                        person.work_super_area.remove_worker(person)
                    school.add(person)
                    # Update capacity tracker
                    self._update_school_capacity(school)
                    # Add to registered members
                    subgroup = 0 if person.age > school.age_max else person.age - school.age_min + 1
                    school.add_to_registered_members(person.id, subgroup_type=subgroup)

`distribute_teachers_to_school(super_area)`

Assigns teachers to super area. The strategy is the following: we loop over the schools to divide them into two subgroups, primary schools and secondary schools. If a school is both, then we assign it randomly to one of the two. Then we loop over the workers in the super area to find the teachers, which we also divide into two subgroups analogously to the schools. We assign the teachers to the schools following a fix student to teacher ratio. We put a lower age limit to teachers at the age of 21.

Parameters:

Name	Type	Description	Default
`super_area`	`SuperArea`		required

Source code in june/distributors/school_distributor.py

def distribute_teachers_to_school(self, super_area: SuperArea):
    """Assigns teachers to super area. The strategy is the following:
    we loop over the schools to divide them into two subgroups,
    primary schools and secondary schools. If a school is both, then
    we assign it randomly to one of the two.
    Then we loop over the workers in the super area to find the teachers,
    which we also divide into two subgroups analogously to the schools.
    We assign the teachers to the schools following a fix student to teacher ratio.
    We put a lower age limit to teachers at the age of 21.

    Args:
        super_area (SuperArea): 

    """
    # separate schools in primary and secondary based on age ranges served
    primary_schools = []
    secondary_schools = []
    for area in super_area.areas:
        for school in area.schools:
            if school.n_pupils == 0:
                continue

            # Classify schools based on age ranges they serve
            # Primary: serves ages <= 11, Secondary: serves ages > 11
            serves_primary = school.age_min <= 11 and school.age_max <= 11
            serves_secondary = school.age_min > 11 or school.age_max > 11
            serves_both = school.age_min <= 11 and school.age_max > 11

            if serves_both:
                # All-through schools: assign based on which age range they serve more
                primary_ages = min(11, school.age_max) - school.age_min + 1
                total_ages = school.age_max - school.age_min + 1
                primary_weight = primary_ages / total_ages

                # Assign to category based on majority age coverage
                if primary_weight > 0.5:
                    primary_schools.append(school)
                else:
                    secondary_schools.append(school)
            elif serves_primary:
                primary_schools.append(school)
            elif serves_secondary:
                secondary_schools.append(school)
            else:
                # Fallback for edge cases - assign randomly
                idx = np.random.randint(0, 2)
                if idx == 0:
                    primary_schools.append(school)
                else:
                    secondary_schools.append(school)
    # assign teacher to student ratios in schools
    for school in primary_schools:
        school.n_teachers_max = int(
            np.round(
                school.n_pupils
                / np.random.poisson(self.teacher_student_ratio_primary)
            )
        )
    for school in secondary_schools:
        school.n_teachers_max = int(
            np.round(
                school.n_pupils
                / np.random.poisson(self.teacher_student_ratio_secondary)
            )
        )

    np.random.shuffle(primary_schools)
    np.random.shuffle(secondary_schools)
    all_teachers = [
        person
        for person in super_area.workers
        if person.sector == self.education_sector_label
        and person.age > self.teacher_min_age
        and person.primary_activity is None
    ]
    primary_teachers = []
    secondary_teachers = []
    extra_teachers = []
    for teacher in all_teachers:
        if teacher.sub_sector == "teacher_primary":
            primary_teachers.append(teacher)
        elif teacher.sub_sector == "teacher_secondary":
            secondary_teachers.append(teacher)
        else:
            extra_teachers.append(teacher)
    np.random.shuffle(primary_teachers)
    np.random.shuffle(secondary_teachers)
    np.random.shuffle(extra_teachers)
    while primary_teachers:
        all_filled = True
        for primary_school in primary_schools:
            if primary_school.n_pupils == 0:
                continue
            if primary_school.n_teachers < primary_school.n_teachers_max:
                all_filled = False
                teacher = primary_teachers.pop()
                if not primary_teachers:
                    all_filled = True
                    break
                primary_school.add(teacher)
                primary_school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
                teacher.lockdown_status = "key_worker"
        if all_filled:
            break

    while secondary_teachers:
        all_filled = True
        for secondary_school in secondary_schools:
            if secondary_school.n_pupils == 0:
                continue
            if secondary_school.n_teachers < secondary_school.n_teachers_max:
                all_filled = False
                teacher = secondary_teachers.pop()
                if not secondary_teachers:
                    all_filled = True
                    break
                secondary_school.add(teacher)
                secondary_school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
                teacher.lockdown_status = "key_worker"
        if all_filled:
            break

    remaining_teachers = primary_teachers + secondary_teachers + extra_teachers
    empty_schools = [
        school
        for school in primary_schools + secondary_schools
        if school.n_pupils > 0 and school.n_teachers == 0
    ]
    for school in empty_schools:
        if not remaining_teachers:
            break
        teacher = remaining_teachers.pop()
        school.add(teacher)
        school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
        teacher.lockdown_status = "key_worker"

    while remaining_teachers:
        all_filled = True
        for school in primary_schools + secondary_schools:
            if school.n_pupils == 0:
                continue
            if school.n_teachers < school.n_teachers_max:
                all_filled = False
                teacher = remaining_teachers.pop()
                if not remaining_teachers:
                    all_filled = True
                    break
                school.add(teacher)
                school.add_to_registered_members(teacher.id, subgroup_type=0)  # Teachers are in subgroup 0
                teacher.lockdown_status = "key_worker"
        if all_filled:
            break

`distribute_teachers_to_schools_in_super_areas(super_areas)`

Parameters:

Name	Type	Description	Default
`super_areas`	`List[SuperArea]`		required

Source code in june/distributors/school_distributor.py

def distribute_teachers_to_schools_in_super_areas(
    self, super_areas: List[SuperArea]
):
    """

    Args:
        super_areas (List[SuperArea]): 

    """
    for super_area in super_areas:
        self.distribute_teachers_to_school(super_area)

    classroom_distribution_data = []
    for super_area in super_areas:
        for area in super_area.areas:
            for school in area.schools:
                # Gather data for each classroom (starting from index 1)
                for classroom_id, classroom in enumerate(school.subgroups[1:], start=1):
                    if not classroom.people:
                        continue  # Skip empty classrooms

                    classroom_distribution_data.append({
                        "| School ID": school.id if hasattr(school, 'id') else "Unknown",
                        "| Classroom ID": classroom_id,
                        "| Total Students in Classroom": len(classroom.people)
                    })

    # Convert data to a DataFrame for easy viewing and show only a sample of 10
    df_classrooms = pd.DataFrame(classroom_distribution_data)
    print("\n===== Teacher Assignment to Schools =====")
    print(df_classrooms.sample(n=min(10, len(df_classrooms))))

`find_jobs(config)` `staticmethod`

Parameters:

Name	Type	Description	Default
`config`	`dict`		required

Source code in june/distributors/school_distributor.py

@staticmethod
def find_jobs(config: dict):
    """

    Args:
        config (dict): 

    """
    education_sector_label = []
    for value1 in config.values():
        if isinstance(value1, dict):
            for value2 in value1.values():
                education_sector_label.append(value2["sector_id"])
    return education_sector_label

`from_file(schools, config_filename=default_config_filename)` `classmethod`

Initialise SchoolDistributor from path to its config file

Parameters:

Name	Type	Description	Default
`schools`	`Schools`	instance of Schools, with information on all schools in world.	required
`config_filename`	`str`	(Default value = default_config_filename)	`default_config_filename`
`#`	`Tuple[int, int]`	(Default value = (5, 18)	required

Returns:

Type	Description
`SchoolDistributor`	SchoolDistributor instance:

Source code in june/distributors/school_distributor.py

@classmethod
def from_file(
    cls,
    schools: "Schools",
    config_filename: str = default_config_filename,
    # mandatory_age_range: Tuple[int, int] = (5, 18),#part of config ?
) -> "SchoolDistributor":
    """Initialise SchoolDistributor from path to its config file

    Args:
        schools ("Schools"): instance of Schools, with information on all schools in world.
        config_filename (str, optional): (Default value = default_config_filename)
        # (Tuple[int, int], optional): (Default value = (5, 18)

    Returns:
        SchoolDistributor instance: 

    """
    with open(config_filename) as f:
        config = yaml.load(f, Loader=yaml.FullLoader)
    education_sector_label = SchoolDistributor.find_jobs(config)
    return SchoolDistributor(
        schools,
        education_sector_label,
        config["neighbour_schools"],
        config["age_range"],
        config["mandatory_age_range"],
        config["teacher_min_age"],
        config["max_classroom_size"],
    )

`from_geography(geography, config_filename=default_config_filename)` `classmethod`

Parameters:

Name	Type	Description	Default
`geography`	`Geography`		required
`config_filename`	`str`	(Default value = default_config_filename)	`default_config_filename`

Source code in june/distributors/school_distributor.py

@classmethod
def from_geography(
    cls, geography: Geography, config_filename: str = default_config_filename
):
    """

    Args:
        geography (Geography): 
        config_filename (str, optional): (Default value = default_config_filename)

    """
    return cls.from_file(geography.schools, config_filename)

`limit_classroom_sizes()`

Limit subgroup sizes that represent class rooms to a maximum number of students. If maximum number is exceeded create new subgroups to distribute students homogeneously

Source code in june/distributors/school_distributor.py

def limit_classroom_sizes(
    self,
):
    """Limit subgroup sizes that represent class rooms to a maximum number of students.
    If maximum number is exceeded create new subgroups to distribute students homogeneously

    """
    for school in self.schools:
        school.limit_classroom_sizes(self.max_classroom_size)
    # Collect classroom data from all schools
    all_classroom_data = []
    for school in self.schools:
        for i, classroom in enumerate(school.subgroups[1:], start=1):  # Skip index 0 for teachers
            student_ids = [student.id for student in classroom.people]

            # Sample up to 5 student IDs for visualization
            sampled_student_ids = random.sample(student_ids, min(5, len(student_ids)))

            all_classroom_data.append({
                "| School ID": school.id,
                "| Classroom": f"{i}",
                "| Total Students": len(classroom.people),
                "| Sample of IDs of Persons (Students)": ", ".join(map(str, sampled_student_ids))
            })

    # Convert the data to a DataFrame for better visualization
    df_classrooms = pd.DataFrame(all_classroom_data)

    print("\n===== Classroom Distribution =====")
    print(df_classrooms)

School distributor