Skip to content

Parse probabilities

convert_comorbidities_prevalence_to_dict(prevalence_female, prevalence_male)

Parameters:

Name Type Description Default
prevalence_female
required
prevalence_male
required
Source code in june/utils/parse_probabilities.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
def convert_comorbidities_prevalence_to_dict(prevalence_female, prevalence_male):
    """

    Args:
        prevalence_female: 
        prevalence_male: 

    """
    prevalence_reference_population = {}
    for comorbidity in prevalence_female.columns:
        prevalence_reference_population[comorbidity] = {
            "f": prevalence_female[comorbidity].to_dict(),
            "m": prevalence_male[comorbidity].to_dict(),
        }
    return prevalence_reference_population

parse_age_probabilities(age_dict, fill_value=0)

Parses the age probability dictionaries into an array.

Parameters:

Name Type Description Default
age_dict dict
required
fill_value

(Default value = 0)

0
Source code in june/utils/parse_probabilities.py
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
def parse_age_probabilities(age_dict: dict, fill_value=0):
    """Parses the age probability dictionaries into an array.

    Args:
        age_dict (dict): 
        fill_value: (Default value = 0)

    """
    if age_dict is None:
        return [0], [0]
    bins = []
    probabilities = []
    for age_range in age_dict:
        age_range_split = age_range.split("-")
        if len(age_range_split) == 1:
            raise NotImplementedError("Please give age ranges as intervals")
        else:
            bins.append(int(age_range_split[0]))
            bins.append(int(age_range_split[1]))
        probabilities.append(age_dict[age_range])
    sorting_idx = np.argsort(bins[::2])
    bins = list(
        chain.from_iterable([bins[2 * idx], bins[2 * idx + 1]] for idx in sorting_idx)
    )
    probabilities = np.array(probabilities)[sorting_idx]
    probabilities_binned = []
    for prob in probabilities:
        probabilities_binned.append(fill_value)
        probabilities_binned.append(prob)
    probabilities_binned.append(fill_value)
    probabilities_per_age = []
    for age in range(100):
        idx = np.searchsorted(bins, age + 1)  # we do +1 to include the lower boundary
        probabilities_per_age.append(probabilities_binned[idx])
    return probabilities_per_age

parse_opens(dict, fill_value=0)

Parses the opening time dictionary into an array

Parameters:

Name Type Description Default
dict dict
required
fill_value

(Default value = 0)

0
Source code in june/utils/parse_probabilities.py
68
69
70
71
72
73
74
75
76
77
78
def parse_opens(dict: dict, fill_value=0):
    """Parses the opening time dictionary into an array

    Args:
        dict (dict): 
        fill_value: (Default value = 0)

    """
    # Convert dict to hashable tuple for caching
    dict_tuple = tuple(sorted(dict.items()))
    return parse_opens_cached(dict_tuple, fill_value)

parse_opens_cached(dict_tuple, fill_value=0) cached

Cached version of parse_opens that works with hashable dict representation

Parameters:

Name Type Description Default
dict_tuple
required
fill_value

(Default value = 0)

0
Source code in june/utils/parse_probabilities.py
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
@lru_cache(maxsize=1024)
def parse_opens_cached(dict_tuple, fill_value=0):
    """Cached version of parse_opens that works with hashable dict representation

    Args:
        dict_tuple: 
        fill_value: (Default value = 0)

    """
    # Convert tuple back to dict for processing
    dict_data = dict(dict_tuple)
    daytype = list(dict_data.keys())
    bins = {}
    for day_i in daytype:
        bins[day_i] = []
        hour_range_split = dict_data[day_i].split("-")
        if len(hour_range_split) == 1:
            raise NotImplementedError("Please give open times as intervals")
        else:
            bins[day_i].append(int(hour_range_split[0]))
            bins[day_i].append(int(hour_range_split[1]))
    return bins

parse_prevalence_comorbidities_in_reference_population(comorbidity_prevalence_reference_population)

Parameters:

Name Type Description Default
comorbidity_prevalence_reference_population
required
Source code in june/utils/parse_probabilities.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def parse_prevalence_comorbidities_in_reference_population(
    comorbidity_prevalence_reference_population,
):
    """

    Args:
        comorbidity_prevalence_reference_population: 

    """
    parsed_comorbidity_prevalence = {}
    for comorbidity, values in comorbidity_prevalence_reference_population.items():
        parsed_comorbidity_prevalence[comorbidity] = {
            "f": parse_age_probabilities(values["f"]),
            "m": parse_age_probabilities(values["m"]),
        }
    return parsed_comorbidity_prevalence

read_comorbidity_csv(filename)

Parameters:

Name Type Description Default
filename str
required
Source code in june/utils/parse_probabilities.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def read_comorbidity_csv(filename: str):
    """

    Args:
        filename (str): 

    """
    comorbidity_df = pd.read_csv(filename, index_col=0)
    column_names = [f"0-{comorbidity_df.columns[0]}"]
    for i in range(len(comorbidity_df.columns) - 1):
        column_names.append(
            f"{comorbidity_df.columns[i]}-{comorbidity_df.columns[i+1]}"
        )
    comorbidity_df.columns = column_names
    for column in comorbidity_df.columns:
        no_comorbidity = comorbidity_df[column].loc["no_condition"]
        should_have_comorbidity = 1 - no_comorbidity
        has_comorbidity = np.sum(comorbidity_df[column]) - no_comorbidity
        comorbidity_df[column].iloc[:-1] *= should_have_comorbidity / has_comorbidity

    return comorbidity_df.T