Skip to content

Infection saver

load_infections_from_hdf5(hdf5_file_path, chunk_size=50000)

Loads infections data from hdf5.

Parameters:

Name Type Description Default
hdf5_file_path str

hdf5 path to load from

required
chunk_size

number of hdf5 chunks to use while loading (Default value = 50000)

50000
Source code in june/hdf5_savers/infection_savers/infection_saver.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
def load_infections_from_hdf5(hdf5_file_path: str, chunk_size=50000):
    """Loads infections data from hdf5.

    Args:
        hdf5_file_path (str): hdf5 path to load from
        chunk_size: number of hdf5 chunks to use while loading (Default value = 50000)

    """
    infections = []
    with h5py.File(hdf5_file_path, "r") as f:
        infections_group = f["infections"]
        n_infections = infections_group.attrs["n_infections"]
        if n_infections == 0:
            return []
        symptoms_list = load_symptoms_from_hdf5(
            hdf5_file_path=hdf5_file_path, chunk_size=chunk_size
        )
        transmissions = load_transmissions_from_hdf5(
            hdf5_file_path=hdf5_file_path, chunk_size=chunk_size
        )
        trans_symp_index = 0
        n_infections = infections_group.attrs["n_infections"]
        n_chunks = int(np.ceil(n_infections / chunk_size))
        for chunk in range(n_chunks):
            idx1 = chunk * chunk_size
            idx2 = min((chunk + 1) * chunk_size, n_infections)
            attribute_dict = {}
            for attribute_name in infections_group.keys():
                if attribute_name in ["symptoms", "transmissions", "infection_class"]:
                    continue
                attribute_dict[attribute_name] = read_dataset(
                    infections_group[attribute_name], idx1, idx2
                )
            for index in range(idx2 - idx1):
                infection_class_str = infections_group["infection_class"][
                    trans_symp_index
                ].decode()
                infection_class = getattr(infection_module, infection_class_str)
                infection = infection_class(
                    transmission=transmissions[trans_symp_index],
                    symptoms=symptoms_list[trans_symp_index],
                )
                trans_symp_index += 1
                for attribute_name in attribute_dict:
                    attribute_value = attribute_dict[attribute_name][index]
                    if attribute_value == np.nan:
                        attribute_value = None
                    setattr(infection, attribute_name, attribute_value)
                infections.append(infection)
    return infections

save_infection_classes_to_hdf5(hdf5_file_path, infections, chunk_size=50000)

Parameters:

Name Type Description Default
hdf5_file_path str
required
infections List[Infection]
required
chunk_size int

(Default value = 50000)

50000
Source code in june/hdf5_savers/infection_savers/infection_saver.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def save_infection_classes_to_hdf5(
    hdf5_file_path: str, infections: List[Infection], chunk_size: int = 50000
):
    """

    Args:
        hdf5_file_path (str): 
        infections (List[Infection]): 
        chunk_size (int, optional): (Default value = 50000)

    """
    n_infections = len(infections)
    n_chunks = int(np.ceil(n_infections / chunk_size))
    with h5py.File(hdf5_file_path, "a") as f:
        for chunk in range(n_chunks):
            idx1 = chunk * chunk_size
            idx2 = min((chunk + 1) * chunk_size, n_infections)
            tosave = []
            for index in range(idx1, idx2):
                infection = infections[index]
                tosave.append(infection.__class__.__name__.encode("ascii", "ignore"))
            data = np.array(tosave, dtype="S20")
            write_dataset(
                group=f["infections"],
                dataset_name="infection_class",
                data=data,
                index1=idx1,
                index2=idx2,
            )

save_infections_to_hdf5(hdf5_file_path, infections, chunk_size=50000)

Saves infections data to hdf5.

Parameters:

Name Type Description Default
hdf5_file_path str

hdf5 path to save symptoms

required
infections List[Infection]
required
chunk_size int

number of hdf5 chunks to use while saving (Default value = 50000)

50000
Source code in june/hdf5_savers/infection_savers/infection_saver.py
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def save_infections_to_hdf5(
    hdf5_file_path: str, infections: List[Infection], chunk_size: int = 50000
):
    """Saves infections data to hdf5.

    Args:
        hdf5_file_path (str): hdf5 path to save symptoms
        infections (List[Infection]): 
        chunk_size (int, optional): number of hdf5 chunks to use while saving (Default value = 50000)

    """
    with h5py.File(hdf5_file_path, "a") as f:
        f.create_group("infections")
        n_infections = len(infections)
        f["infections"].attrs["n_infections"] = n_infections
        if n_infections == 0:
            return
        symptoms_list = [infection.symptoms for infection in infections]
        transmission_list = [infection.transmission for infection in infections]
        save_symptoms_to_hdf5(
            symptoms_list=symptoms_list,
            hdf5_file_path=hdf5_file_path,
            chunk_size=chunk_size,
        )
        save_transmissions_to_hdf5(
            transmissions=transmission_list,
            hdf5_file_path=hdf5_file_path,
            chunk_size=chunk_size,
        )
        attributes_to_save = ["start_time"]
        n_chunks = int(np.ceil(n_infections / chunk_size))
        for chunk in range(n_chunks):
            idx1 = chunk * chunk_size
            idx2 = min((chunk + 1) * chunk_size, n_infections)
            attribute_dict = defaultdict(list)
            for index in range(idx1, idx2):
                infection = infections[index]
                for attribute_name in attributes_to_save:
                    attribute = getattr(infection, attribute_name)
                    if attribute is None:
                        attribute_dict[attribute_name].append(np.nan)
                    else:
                        attribute_dict[attribute_name].append(attribute)
            for attribute_name in attributes_to_save:
                data = np.array(attribute_dict[attribute_name], dtype=np.float64)
                write_dataset(
                    group=f["infections"],
                    dataset_name=attribute_name,
                    data=data,
                    index1=idx1,
                    index2=idx2,
                )
    save_infection_classes_to_hdf5(
        hdf5_file_path=hdf5_file_path, infections=infections, chunk_size=chunk_size
    )