Skip to content

Domain decomposition

DomainSplitter

Class used to split the world into n domains containing an equal number of super areas continuous to each other.

In non-MPI mode or with a single MPI process, all super areas are assigned to domain 0.

Source code in june/domains/domain_decomposition.py
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
class DomainSplitter:
    """Class used to split the world into ``n`` domains containing an equal number
    of super areas continuous to each other.

    In non-MPI mode or with a single MPI process, all super areas are assigned to domain 0.

    """

    def __init__(
        self,
        number_of_domains: int,
        super_area_data: dict,
        super_area_centroids_path: str = default_super_area_centroids_path,
        super_area_adjacency_graph_path: str = default_super_area_adjaceny_graph_path,
        weights=default_weights,
    ):
        """
        Parameters
        ----------
        number_of_domains
            how many domains to split for
        super_area_data
            dictionary specifying the number of people, workers, pupils and commmuters
            per super area
        """
        self.number_of_domains = number_of_domains

        # In non-MPI mode or single process, create a simple split
        if not mpi_available or number_of_domains <= 1:
            self.simple_split = True
            self.super_area_data = super_area_data
            return

        self.simple_split = False
        with open(super_area_adjacency_graph_path, "r") as f:
            self.adjacency_graph = json.load(f)
        self.super_area_data = super_area_data
        self.super_area_df = pd.read_csv(super_area_centroids_path, index_col=0)
        self.super_area_df = self.super_area_df.loc[super_area_data.keys()]
        super_area_scores = list(
            map(lambda x: self.get_score(x, weights=weights), self.super_area_df.index)
        )
        self.super_area_df.loc[:, "score"] = super_area_scores

    @classmethod
    def generate_world_split(
        cls,
        number_of_domains: int,
        world_path: str,
        weights=default_weights,
        super_area_centroids_path: str = default_super_area_centroids_path,
        super_area_adjacency_graph_path: str = default_super_area_adjaceny_graph_path,
        maxiter=100,
    ):
        """

        Args:
            number_of_domains (int): 
            world_path (str): 
            weights: (Default value = default_weights)
            super_area_centroids_path (str, optional): (Default value = default_super_area_centroids_path)
            super_area_adjacency_graph_path (str, optional): (Default value = default_super_area_adjaceny_graph_path)
            maxiter: (Default value = 100)

        """
        super_area_data = load_data_for_domain_decomposition(world_path)

        # If not using MPI or only one domain is needed, create a simple split
        if not mpi_available or number_of_domains <= 1:
            # Assign all super areas to domain 0
            super_areas_per_domain = {0: list(super_area_data.keys())}

            # Calculate total score for domain 0
            total_score = sum(
                weights["population"] * data["n_people"]
                + weights["workers"] * (data["n_workers"] + data["n_pupils"])
                + weights["commuters"] * data["n_commuters"]
                for data in super_area_data.values()
            )
            score_per_domain = {0: total_score}

            return super_areas_per_domain, score_per_domain

        # Otherwise, use the normal clustering approach
        ds = cls(
            number_of_domains=number_of_domains,
            super_area_data=super_area_data,
            super_area_centroids_path=super_area_centroids_path,
            super_area_adjacency_graph_path=super_area_adjacency_graph_path,
            weights=weights,
        )
        return ds.generate_domain_split(maxiter=maxiter)

    def get_score(self, super_area, weights=default_weights):
        """

        Args:
            super_area: 
            weights: (Default value = default_weights)

        """
        data = self.super_area_data[super_area]
        return (
            weights["population"] * data["n_people"]
            + weights["workers"] * (data["n_workers"] + data["n_pupils"])
            + weights["commuters"] * data["n_commuters"]
        )

    def generate_domain_split(self, maxiter=100):
        """

        Args:
            maxiter: (Default value = 100)

        """
        # For single-domain case
        if self.simple_split:
            super_areas_per_domain = {0: list(self.super_area_data.keys())}

            # Calculate total score for domain 0
            total_score = sum(
                default_weights["population"] * data["n_people"]
                + default_weights["workers"] * (data["n_workers"] + data["n_pupils"])
                + default_weights["commuters"] * data["n_commuters"]
                for data in self.super_area_data.values()
            )
            score_per_domain = {0: total_score}

            return super_areas_per_domain, score_per_domain

        # For multi-domain case
        points = list(
            self.super_area_df.apply(
                lambda row: Point(row["X"], row["Y"], row["score"], row.name), axis=1
            ).values
        )
        # Create a mapping from super area name to point for fast lookups
        name_to_point = {point.name: point for point in points}

        for point in points:
            point.neighbors = []
            # Get neighbor area codes from the new adjacency list format
            if point.name in self.adjacency_graph:
                neighbor_codes = self.adjacency_graph[point.name]
                # Convert neighbor codes to actual point objects that exist in our dataset
                point.neighbors = [
                    name_to_point[neighbor_code]
                    for neighbor_code in neighbor_codes
                    if neighbor_code in name_to_point
                ]
            # If super area not in adjacency graph, it has no neighbors
        sc = ScoreClustering(n_clusters=self.number_of_domains)
        clusters = sc.fit(points, maxiter=maxiter)
        super_areas_per_domain = {}
        score_per_domain = {}
        for (i, cluster) in enumerate(clusters):
            super_areas_per_domain[i] = [point.name for point in cluster.points]
            score_per_domain[i] = cluster.score
        print(f"Score is {sc.calculate_score_unbalance(clusters)}")
        return super_areas_per_domain, score_per_domain

__init__(number_of_domains, super_area_data, super_area_centroids_path=default_super_area_centroids_path, super_area_adjacency_graph_path=default_super_area_adjaceny_graph_path, weights=default_weights)

Parameters

number_of_domains how many domains to split for super_area_data dictionary specifying the number of people, workers, pupils and commmuters per super area

Source code in june/domains/domain_decomposition.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
def __init__(
    self,
    number_of_domains: int,
    super_area_data: dict,
    super_area_centroids_path: str = default_super_area_centroids_path,
    super_area_adjacency_graph_path: str = default_super_area_adjaceny_graph_path,
    weights=default_weights,
):
    """
    Parameters
    ----------
    number_of_domains
        how many domains to split for
    super_area_data
        dictionary specifying the number of people, workers, pupils and commmuters
        per super area
    """
    self.number_of_domains = number_of_domains

    # In non-MPI mode or single process, create a simple split
    if not mpi_available or number_of_domains <= 1:
        self.simple_split = True
        self.super_area_data = super_area_data
        return

    self.simple_split = False
    with open(super_area_adjacency_graph_path, "r") as f:
        self.adjacency_graph = json.load(f)
    self.super_area_data = super_area_data
    self.super_area_df = pd.read_csv(super_area_centroids_path, index_col=0)
    self.super_area_df = self.super_area_df.loc[super_area_data.keys()]
    super_area_scores = list(
        map(lambda x: self.get_score(x, weights=weights), self.super_area_df.index)
    )
    self.super_area_df.loc[:, "score"] = super_area_scores

generate_domain_split(maxiter=100)

Parameters:

Name Type Description Default
maxiter

(Default value = 100)

100
Source code in june/domains/domain_decomposition.py
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
def generate_domain_split(self, maxiter=100):
    """

    Args:
        maxiter: (Default value = 100)

    """
    # For single-domain case
    if self.simple_split:
        super_areas_per_domain = {0: list(self.super_area_data.keys())}

        # Calculate total score for domain 0
        total_score = sum(
            default_weights["population"] * data["n_people"]
            + default_weights["workers"] * (data["n_workers"] + data["n_pupils"])
            + default_weights["commuters"] * data["n_commuters"]
            for data in self.super_area_data.values()
        )
        score_per_domain = {0: total_score}

        return super_areas_per_domain, score_per_domain

    # For multi-domain case
    points = list(
        self.super_area_df.apply(
            lambda row: Point(row["X"], row["Y"], row["score"], row.name), axis=1
        ).values
    )
    # Create a mapping from super area name to point for fast lookups
    name_to_point = {point.name: point for point in points}

    for point in points:
        point.neighbors = []
        # Get neighbor area codes from the new adjacency list format
        if point.name in self.adjacency_graph:
            neighbor_codes = self.adjacency_graph[point.name]
            # Convert neighbor codes to actual point objects that exist in our dataset
            point.neighbors = [
                name_to_point[neighbor_code]
                for neighbor_code in neighbor_codes
                if neighbor_code in name_to_point
            ]
        # If super area not in adjacency graph, it has no neighbors
    sc = ScoreClustering(n_clusters=self.number_of_domains)
    clusters = sc.fit(points, maxiter=maxiter)
    super_areas_per_domain = {}
    score_per_domain = {}
    for (i, cluster) in enumerate(clusters):
        super_areas_per_domain[i] = [point.name for point in cluster.points]
        score_per_domain[i] = cluster.score
    print(f"Score is {sc.calculate_score_unbalance(clusters)}")
    return super_areas_per_domain, score_per_domain

generate_world_split(number_of_domains, world_path, weights=default_weights, super_area_centroids_path=default_super_area_centroids_path, super_area_adjacency_graph_path=default_super_area_adjaceny_graph_path, maxiter=100) classmethod

Parameters:

Name Type Description Default
number_of_domains int
required
world_path str
required
weights

(Default value = default_weights)

default_weights
super_area_centroids_path str

(Default value = default_super_area_centroids_path)

default_super_area_centroids_path
super_area_adjacency_graph_path str

(Default value = default_super_area_adjaceny_graph_path)

default_super_area_adjaceny_graph_path
maxiter

(Default value = 100)

100
Source code in june/domains/domain_decomposition.py
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
@classmethod
def generate_world_split(
    cls,
    number_of_domains: int,
    world_path: str,
    weights=default_weights,
    super_area_centroids_path: str = default_super_area_centroids_path,
    super_area_adjacency_graph_path: str = default_super_area_adjaceny_graph_path,
    maxiter=100,
):
    """

    Args:
        number_of_domains (int): 
        world_path (str): 
        weights: (Default value = default_weights)
        super_area_centroids_path (str, optional): (Default value = default_super_area_centroids_path)
        super_area_adjacency_graph_path (str, optional): (Default value = default_super_area_adjaceny_graph_path)
        maxiter: (Default value = 100)

    """
    super_area_data = load_data_for_domain_decomposition(world_path)

    # If not using MPI or only one domain is needed, create a simple split
    if not mpi_available or number_of_domains <= 1:
        # Assign all super areas to domain 0
        super_areas_per_domain = {0: list(super_area_data.keys())}

        # Calculate total score for domain 0
        total_score = sum(
            weights["population"] * data["n_people"]
            + weights["workers"] * (data["n_workers"] + data["n_pupils"])
            + weights["commuters"] * data["n_commuters"]
            for data in super_area_data.values()
        )
        score_per_domain = {0: total_score}

        return super_areas_per_domain, score_per_domain

    # Otherwise, use the normal clustering approach
    ds = cls(
        number_of_domains=number_of_domains,
        super_area_data=super_area_data,
        super_area_centroids_path=super_area_centroids_path,
        super_area_adjacency_graph_path=super_area_adjacency_graph_path,
        weights=weights,
    )
    return ds.generate_domain_split(maxiter=maxiter)

get_score(super_area, weights=default_weights)

Parameters:

Name Type Description Default
super_area
required
weights

(Default value = default_weights)

default_weights
Source code in june/domains/domain_decomposition.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
def get_score(self, super_area, weights=default_weights):
    """

    Args:
        super_area: 
        weights: (Default value = default_weights)

    """
    data = self.super_area_data[super_area]
    return (
        weights["population"] * data["n_people"]
        + weights["workers"] * (data["n_workers"] + data["n_pupils"])
        + weights["commuters"] * data["n_commuters"]
    )