Skip to content

Hobbies

HobbyGenerator

A class to manage hobbies

The class manages how hobbies are assigned including loading data, precomputing probabilities, and assigning hobbies based on gender and age.

Source code in june/demography/hobbies.py
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
class HobbyGenerator:
    """A class to manage hobbies

    The class manages how hobbies are assigned including loading data,
    precomputing probabilities, and assigning hobbies based on gender and age.

    """


    def __init__(self, yaml_file=None):
        """
        Initialise the Hobbies class by loading data from a YAML file,
        precomputing probabilities, and discarding intermediate data.

        Parameters:
          yaml_file (str, optional):
            Path to the YAML file containing hobby data. If not provided, uses the default.
        """
        if yaml_file is None:
            yaml_file = paths.DefaultPaths.get_default_hobbies_path()

        # Load YAML and compute precomputed_data in one method
        self.precomputed_data = self._precompute_data(yaml_file)

    def _precompute_data(self, file_path) -> dict:
        """Precompute normalised probabilities for all age-sex combinations
        and store them as a flat mapping for fast lookup.

        Args:
            file_path (str): Path to the YAML file.

        Returns:
            precomputed (dict): Precomputed normalised probabilities for hobbies.

        """
        # Load YAML data
        with open(file_path, "r") as file:
            hobby_data = yaml.safe_load(file)

        precomputed = {}
        sex_map = {"male": "m", "female": "f"}

        for details in hobby_data["hobbies"].values():
            for long_sex, short_sex in sex_map.items():
                for age_bin, category_prob in details["probability"].get(long_sex, {}).items():
                    # Calculate raw probabilities for sub-hobbies
                    sub_hobby_probs = {
                        sub_hobby: category_prob * sub_prob
                        for sub_hobby, sub_prob in details["options"].items()
                    }

                    # Normalise the probabilities
                    total_weight = sum(sub_hobby_probs.values())
                    normalised_probs = (
                        {hobby: weight / total_weight for hobby, weight in sub_hobby_probs.items()}
                        if total_weight > 0
                        else {}
                    )

                    # Store in a flat structure: {(sex, age_bin): {hobby: normalised_weight}}
                    key = (short_sex, age_bin)
                    if key not in precomputed:
                        precomputed[key] = {}
                    precomputed[key].update(normalised_probs)

        return precomputed

    def assign_hobbies(self, sex, age):
        """Assign hobbies to a person based on their gender and age by directly using precomputed probabilities.

        Args:
            sex (str): "m" or "f".
            age (int): Age of the person.

        Returns:
            (list): A list of 1-2 unique hobbies assigned to the person.

        """
        # Determine the age bin
        age_bin = self._determine_age_bin(age)

        # Retrieve Precomputed probabilities
        key = (sex, age_bin)
        sub_hobby_probs = self.precomputed_data.get(key, {})

        if not sub_hobby_probs:
            return []  # No hobbies available for this combination

        # Select 1-2 unique hobbies based on precomputed probabilities
        hobbies = list(sub_hobby_probs.keys())
        weights = list(sub_hobby_probs.values())

        # Ensure we don't select more hobbies than available
        num_hobbies = min(random.randint(1, 2), len(hobbies))

        # Use random.choices if there's only one hobby, otherwise random.sample with weights
        if num_hobbies == 1:
            return random.choices(hobbies, weights=weights, k=num_hobbies)
        else:
            selected_hobbies = random.sample(
                population=[
                    (hobby, weight) for hobby, weight in zip(hobbies, weights)
                ],
                k=num_hobbies,
            )
            return [hobby for hobby, _ in selected_hobbies]

    def _determine_age_bin(self, age):
        """Determine the age bin for a given age.

        Args:
            age (int): The age of the person.

        Returns:
            (str): The corresponding age bin as a string.

        """
        if age <= 4:
            return "0-4"
        elif age <= 9:
            return "5-9"
        elif age <= 14:
            return "10-14"
        elif age <= 19:
            return "15-19"
        elif age <= 29:
            return "20-29"
        elif age <= 39:
            return "30-39"
        elif age <= 49:
            return "40-49"
        elif age <= 59:
            return "50-59"
        else:
            return "60+"

__init__(yaml_file=None)

Initialise the Hobbies class by loading data from a YAML file, precomputing probabilities, and discarding intermediate data.

Parameters:

Name Type Description Default
yaml_file str

Path to the YAML file containing hobby data. If not provided, uses the default.

None
Source code in june/demography/hobbies.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
def __init__(self, yaml_file=None):
    """
    Initialise the Hobbies class by loading data from a YAML file,
    precomputing probabilities, and discarding intermediate data.

    Parameters:
      yaml_file (str, optional):
        Path to the YAML file containing hobby data. If not provided, uses the default.
    """
    if yaml_file is None:
        yaml_file = paths.DefaultPaths.get_default_hobbies_path()

    # Load YAML and compute precomputed_data in one method
    self.precomputed_data = self._precompute_data(yaml_file)

assign_hobbies(sex, age)

Assign hobbies to a person based on their gender and age by directly using precomputed probabilities.

Parameters:

Name Type Description Default
sex str

"m" or "f".

required
age int

Age of the person.

required

Returns:

Type Description
list

A list of 1-2 unique hobbies assigned to the person.

Source code in june/demography/hobbies.py
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
def assign_hobbies(self, sex, age):
    """Assign hobbies to a person based on their gender and age by directly using precomputed probabilities.

    Args:
        sex (str): "m" or "f".
        age (int): Age of the person.

    Returns:
        (list): A list of 1-2 unique hobbies assigned to the person.

    """
    # Determine the age bin
    age_bin = self._determine_age_bin(age)

    # Retrieve Precomputed probabilities
    key = (sex, age_bin)
    sub_hobby_probs = self.precomputed_data.get(key, {})

    if not sub_hobby_probs:
        return []  # No hobbies available for this combination

    # Select 1-2 unique hobbies based on precomputed probabilities
    hobbies = list(sub_hobby_probs.keys())
    weights = list(sub_hobby_probs.values())

    # Ensure we don't select more hobbies than available
    num_hobbies = min(random.randint(1, 2), len(hobbies))

    # Use random.choices if there's only one hobby, otherwise random.sample with weights
    if num_hobbies == 1:
        return random.choices(hobbies, weights=weights, k=num_hobbies)
    else:
        selected_hobbies = random.sample(
            population=[
                (hobby, weight) for hobby, weight in zip(hobbies, weights)
            ],
            k=num_hobbies,
        )
        return [hobby for hobby, _ in selected_hobbies]