Skip to content

Symptoms saver

load_symptoms_from_hdf5(hdf5_file_path, chunk_size=50000)

Loads symptoms data from hdf5.

Parameters:

Name Type Description Default
hdf5_file_path str

hdf5 path to load from

required
chunk_size

number of hdf5 chunks to use while loading (Default value = 50000)

50000
Source code in june/hdf5_savers/infection_savers/symptoms_saver.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def load_symptoms_from_hdf5(hdf5_file_path: str, chunk_size=50000):
    """Loads symptoms data from hdf5.

    Args:
        hdf5_file_path (str): hdf5 path to load from
        chunk_size: number of hdf5 chunks to use while loading (Default value = 50000)

    """
    symptoms = []
    with h5py.File(hdf5_file_path, "r") as f:
        symptoms_group = f["infections"]["symptoms"]
        n_symptoms = symptoms_group.attrs["n_symptoms"]
        n_chunks = int(np.ceil(n_symptoms / chunk_size))
        for chunk in range(n_chunks):
            idx1 = chunk * chunk_size
            idx2 = min((chunk + 1) * chunk_size, n_symptoms)
            max_tag_list = read_dataset(symptoms_group["max_tag"], idx1, idx2)
            tag_list = read_dataset(symptoms_group["tag"], idx1, idx2)
            max_severity_list = read_dataset(symptoms_group["max_severity"], idx1, idx2)
            stage_list = read_dataset(symptoms_group["stage"], idx1, idx2)
            time_of_symptoms_onset_list = read_dataset(
                symptoms_group["time_of_symptoms_onset"], idx1, idx2
            )
            trajectory_times_list = read_dataset(
                symptoms_group["trajectory_times"], idx1, idx2
            )
            trajectory_symptom_list = read_dataset(
                symptoms_group["trajectory_symptoms"], idx1, idx2
            )
            for index in range(idx2 - idx1):
                symptom = Symptoms()
                symptom.tag = SymptomTag(tag_list[index])
                symptom.max_tag = SymptomTag(max_tag_list[index])
                symptom.stage = stage_list[index]
                symptom.max_severity = max_severity_list[index]
                symptom.time_of_symptoms_onset = time_of_symptoms_onset_list[index]
                symptom.trajectory = tuple(
                    [
                        (time, SymptomTag(symp))
                        for time, symp in zip(
                            trajectory_times_list[index], trajectory_symptom_list[index]
                        )
                    ]
                )
                symptoms.append(symptom)
    return symptoms

save_symptoms_to_hdf5(hdf5_file_path, symptoms_list, chunk_size=50000)

Saves symptoms data to hdf5.

Parameters:

Name Type Description Default
hdf5_file_path str

hdf5 path to save symptoms

required
symptoms_list List[Symptoms]
required
chunk_size int

number of hdf5 chunks to use while saving (Default value = 50000)

50000
Source code in june/hdf5_savers/infection_savers/symptoms_saver.py
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
def save_symptoms_to_hdf5(
    hdf5_file_path: str, symptoms_list: List[Symptoms], chunk_size: int = 50000
):
    """Saves symptoms data to hdf5.

    Args:
        hdf5_file_path (str): hdf5 path to save symptoms
        symptoms_list (List[Symptoms]): 
        chunk_size (int, optional): number of hdf5 chunks to use while saving (Default value = 50000)

    """
    with h5py.File(hdf5_file_path, "a") as f:
        if "infections" not in f:
            f.create_group("infections")
        f["infections"].create_group("symptoms")
        symptoms_group = f["infections"]["symptoms"]
        n_symptoms = len(symptoms_list)
        symptoms_group.attrs["n_symptoms"] = n_symptoms
        n_chunks = int(np.ceil(n_symptoms / chunk_size))
        for chunk in range(n_chunks):
            idx1 = chunk * chunk_size
            idx2 = min((chunk + 1) * chunk_size, n_symptoms)
            attribute_dict = {}
            max_tag_list = []
            tag_list = []
            max_severity_list = []
            stage_list = []
            time_of_symptoms_onset_list = []
            for index in range(idx1, idx2):
                symptoms = symptoms_list[index]
                max_tag_list.append(symptoms.max_tag)
                tag_list.append(symptoms.tag)
                max_severity_list.append(symptoms.max_severity)
                stage_list.append(symptoms.stage)
                time_of_symptoms_onset_list.append(symptoms.time_of_symptoms_onset)
            attribute_dict["max_tag"] = np.array(max_tag_list, dtype=np.int64)
            attribute_dict["tag"] = np.array(tag_list, dtype=np.int64)
            attribute_dict["max_severity"] = np.array(
                max_severity_list, dtype=np.float64
            )
            attribute_dict["stage"] = np.array(stage_list, dtype=np.int64)
            attribute_dict["time_of_symptoms_onset"] = np.array(
                time_of_symptoms_onset_list, dtype=np.float64
            )
            for attribute_name, attribute_value in attribute_dict.items():
                write_dataset(
                    group=symptoms_group,
                    dataset_name=attribute_name,
                    data=attribute_value,
                    index1=idx1,
                    index2=idx2,
                )
        trajectory_times_list = []
        trajectory_symptom_list = []
        trajectory_lengths = []
        for symptoms in symptoms_list:
            times = []
            symps = []
            for time, symp in symptoms.trajectory:
                times.append(time)
                symps.append(symp)
            trajectory_times_list.append(np.array(times, dtype=np.float64))
            trajectory_symptom_list.append(np.array(symps, dtype=np.int64))
            trajectory_lengths.append(len(times))
        if len(np.unique(trajectory_lengths)) == 1:
            write_dataset(
                group=symptoms_group,
                dataset_name="trajectory_times",
                data=np.array(trajectory_times_list, dtype=float),
            )
            write_dataset(
                group=symptoms_group,
                dataset_name="trajectory_symptoms",
                data=np.array(trajectory_symptom_list, dtype=int),
            )
        else:
            write_dataset(
                group=symptoms_group,
                dataset_name="trajectory_times",
                data=np.array(trajectory_times_list, dtype=float_vlen_type),
            )
            write_dataset(
                group=symptoms_group,
                dataset_name="trajectory_symptoms",
                data=np.array(trajectory_symptom_list, dtype=int_vlen_type),
            )