Skip to content

Transmission saver

load_transmissions_from_hdf5(hdf5_file_path, chunk_size=50000)

Loads transmissions data from hdf5.

Parameters:

Name Type Description Default
hdf5_file_path str

hdf5 path to load from

required
chunk_size

number of hdf5 chunks to use while loading (Default value = 50000)

50000
Source code in june/hdf5_savers/infection_savers/transmission_saver.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def load_transmissions_from_hdf5(hdf5_file_path: str, chunk_size=50000):
    """Loads transmissions data from hdf5.

    Args:
        hdf5_file_path (str): hdf5 path to load from
        chunk_size: number of hdf5 chunks to use while loading (Default value = 50000)

    """
    transmissions = []
    with h5py.File(hdf5_file_path, "r") as f:
        transmissions_group = f["infections"]["transmissions"]
        n_transsmissions = transmissions_group.attrs["n_transsmissions"]
        transmission_type = transmissions_group.attrs["transmission_type"]
        transmission_class = str_to_class[transmission_type]
        n_chunks = int(np.ceil(n_transsmissions / chunk_size))
        for chunk in range(n_chunks):
            idx1 = chunk * chunk_size
            idx2 = min((chunk + 1) * chunk_size, n_transsmissions)
            attribute_dict = {}
            for attribute_name in transmissions_group.keys():
                attribute_dict[attribute_name] = read_dataset(
                    transmissions_group[attribute_name], idx1, idx2
                )
            for index in range(idx2 - idx1):
                transmission = transmission_class()
                for attribute_name in attribute_dict:
                    attribute_value = attribute_dict[attribute_name][index]
                    if attribute_value == np.nan:
                        attribute_value = None
                    setattr(transmission, attribute_name, attribute_value)
                transmissions.append(transmission)
    return transmissions

save_transmissions_to_hdf5(hdf5_file_path, transmissions, chunk_size=50000)

Saves transmissions data to hdf5. The transmission type is inferred from the first element of the list.

Parameters:

Name Type Description Default
hdf5_file_path str

hdf5 path to save transmissions

required
transmissions List[Transmission]

list of transmission objects

required
chunk_size int

number of hdf5 chunks to use while saving (Default value = 50000)

50000
Source code in june/hdf5_savers/infection_savers/transmission_saver.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
def save_transmissions_to_hdf5(
    hdf5_file_path: str, transmissions: List[Transmission], chunk_size: int = 50000
):
    """Saves transmissions data to hdf5. The transmission type is inferred from the first
    element of the list.

    Args:
        hdf5_file_path (str): hdf5 path to save transmissions
        transmissions (List[Transmission]): list of transmission objects
        chunk_size (int, optional): number of hdf5 chunks to use while saving (Default value = 50000)

    """
    with h5py.File(hdf5_file_path, "a") as f:
        if "infections" not in f:
            f.create_group("infections")
        f["infections"].create_group("transmissions")
        transmissions_group = f["infections"]["transmissions"]
        n_transsmissions = len(transmissions)
        transmissions_group.attrs["n_transsmissions"] = n_transsmissions
        transmission_type = transmissions[0].__class__.__name__
        transmissions_group.attrs["transmission_type"] = transmission_type
        n_chunks = int(np.ceil(n_transsmissions / chunk_size))
        attributes_to_save = attributes_to_save_dict[transmission_type]
        for chunk in range(n_chunks):
            idx1 = chunk * chunk_size
            idx2 = min((chunk + 1) * chunk_size, n_transsmissions)
            attribute_dict = defaultdict(list)
            for index in range(idx1, idx2):
                transmission = transmissions[index]
                for attribute_name in attributes_to_save:
                    attribute = getattr(transmission, attribute_name)
                    if attribute is None:
                        attribute_dict[attribute_name].append(np.nan)
                    else:
                        attribute_dict[attribute_name].append(attribute)
            for attribute_name in attributes_to_save:
                attribute_dict[attribute_name] = np.array(
                    attribute_dict[attribute_name], dtype=np.float64
                )
            for attribute_name in attributes_to_save:
                write_dataset(
                    group=transmissions_group,
                    dataset_name=attribute_name,
                    data=attribute_dict[attribute_name],
                    index1=idx1,
                    index2=idx2,
                )