17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428 | class HouseholdDistributor:
""" """
def __init__(
self
):
"""
Tool to distribute people from area objects into different household types
based on household composition data.
"""
self.household_stats = []
self.data_loader = HouseholdDataLoader() # Default initialization, will be updated with area names
self.compatibility_matcher = CompatibilityMatcher(self.data_loader)
self.composition_manager = CompositionMaanger()
self.stats_reporter = HouseholdStatsReporter(self.composition_manager)
self.household_factory = HouseholdFactory(self.stats_reporter, self.compatibility_matcher)
self.flexible_household_manager = FlexibleHouseholdManager(
self.household_factory, self.composition_manager
)
self.excess_people_allocator = ExcessPeopleAllocator()
# Load all data upfront
self.data_loader.load_all_data()
def distribute_people_to_households(self, areas: Areas):
"""Distribute people into households based on household composition data.
Uses deques for efficient O(1) person allocation.
Args:
areas (Areas):
Returns:
dict: {
'households': Households object containing created household objects,
'unallocated_people': dict with unallocated people by area/age/sex
dict: {
'households': Households object containing created household objects,
'unallocated_people': dict with unallocated people by area/age/sex
}
"""
logger.info(f"Allocating people to initial households...")
# Update data loader with area names for proper regional file loading
area_names = [getattr(area, 'name', str(area.id)) for area in areas]
self.data_loader.area_names = area_names
people_by_area = self.data_loader.group_people_by_age_sex_area(areas)
created_households = []
all_unallocated_people = {}
total_areas = len(areas)
logger.info(f"Processing {total_areas:,} areas for household allocation...")
for i, area in enumerate(areas):
area_id = getattr(area, 'name', str(area.id))
# Progress logging - every 100 areas or key milestones
if i % 100 == 0 or i == total_areas - 1:
percent_complete = ((i + 1) / total_areas) * 100
logger.info(f"Processing area {i+1:,}/{total_areas:,} ({percent_complete:.1f}%): {area_id} | Total households created: {len(created_households):,}")
area_households, area_unallocated = self._process_area(area, area_id, people_by_area)
created_households.extend(area_households)
if area_unallocated:
all_unallocated_people[area_id] = area_unallocated
# Create Households object and return both households and unallocated people
households_object = Households(created_households)
total_unallocated = sum(
len(people) for area_data in all_unallocated_people.values()
for age_group in area_data.values()
for people in age_group.values()
)
logger.info(f"Completed initial household allocation: {len(created_households):,} households created across {total_areas:,} areas, {total_unallocated:,} people unallocated")
return {
'households': households_object,
'unallocated_people': all_unallocated_people
}
def _process_area(self, area, area_id: str, people_by_area: dict):
"""Process a single area, creating households for all compositions.
Args:
area (Area): the area object to be processed
area_id (str): identifier of the area
people_by_area (dict): Dict[str, Dict[str, Dict[str, deque[Person]]]], {area_id: {age_group: {sex: deque([people])}}}}.
Created using group_people_by_age_sex_area(self, areas: Areas).
Returns:
tuple: (area_households, area_unallocated_people)
"""
# Initialize area stats tracking
area_stats = self.stats_reporter.initialize_area_stats(area, area_id, people_by_area)
# Get household composition data for this area
area_compositions = self.data_loader.get_area_household_compositions(area_id)
area_stats['original_requirements'] = area_compositions.copy()
area_households = []
# Sort compositions by priority: families with kids first, then by number of kids (desc), then by adults (desc)
prioritized_compositions = self.composition_manager.prioritize_compositions(area_compositions)
for composition, count in prioritized_compositions:
# Skip compositions we don't want to process yet
if composition == "0 >=0 >=0 >=0":
logger.debug(f"Skipping composition '{composition}' - not implemented yet")
continue
composition_households = self._handle_composition(
composition, count, area, area_id, people_by_area, area_stats
)
area_households.extend(composition_households)
# Calculate final stats and display
#self.stats_reporter.finalize_and_display_area_stats(area_stats, area_households, people_by_area, area_id)
# Collect unallocated people for this area
area_unallocated = self._collect_unallocated_people(area_id, people_by_area)
return area_households, area_unallocated
def _handle_composition(self, composition: str, count: int, area, area_id: str,
people_by_area: dict, area_stats: dict):
"""Handle creation and demotion for a single composition type.
Args:
composition (str):
count (int):
area:
area_id (str):
people_by_area (dict):
area_stats (dict):
Returns:
list: Households created for this composition (including demoted ones)
"""
composition_households = []
# Check if this composition needs assumption expansion
if self.composition_manager.needs_assumption_expansion(composition):
logger.debug(f"Composition '{composition}' has children but no guaranteed adults - expanding with multigenerational assumption")
expanded_composition = self.composition_manager.expand_with_assumptions(composition)
# Use the expanded composition for household creation, but keep original for tracking
kids, young_adults, adults, old_adults = self.composition_manager.parse_composition_string(expanded_composition)
working_composition = expanded_composition
else:
# Parse composition normally (e.g., "2 0 2 0" or ">=2 >=0 2 0" -> [2, 0, 2, 0])
kids, young_adults, adults, old_adults = self.composition_manager.parse_composition_string(composition)
# Create working composition without >= symbols for consistent tracking
working_composition = f"{kids} {young_adults} {adults} {old_adults}"
successful_creations = 0
# Try to create the requested number of households
for _ in range(count):
household = self.household_factory.create_household_from_composition(
area, area_id, kids, young_adults, adults, old_adults, people_by_area,
original_composition=working_composition # Use normalized composition for demotion tracking
)
if household:
# Store the literal composition for expandability logic (preserve >= symbols)
household.literal_composition = composition
composition_households.append(household)
successful_creations += 1
# Check if household was created with fewer adults than requested (widowed parent scenario)
if hasattr(household, 'original_composition') and hasattr(household, 'actual_composition'):
if household.original_composition and household.original_composition != household.actual_composition:
# Track this as a partial demotion (e.g., 2 adults -> 1 adult due to single parent)
self.stats_reporter.track_demotion(area_stats, household.original_composition, household.actual_composition, 1)
logger.debug(f"Tracked partial demotion: {household.original_composition} -> {household.actual_composition} (single parent household)")
# Track success/failure rates
area_stats['creation_results'][composition] = {
'requested': count,
'created': successful_creations,
'success_rate': successful_creations / count if count > 0 else 0
}
# Handle failed household creations by demoting to simpler compositions
failed_count = count - successful_creations
if failed_count > 0:
demoted_households = self._handle_demotion(
composition, working_composition, failed_count, area, area_id,
people_by_area, area_stats
)
composition_households.extend(demoted_households)
return composition_households
def _handle_demotion(self, original_composition: str, working_composition: str,
failed_count: int, area, area_id: str, people_by_area: dict,
area_stats: dict):
"""Handle demotion of failed household creations.
Args:
original_composition (str):
working_composition (str):
failed_count (int):
area:
area_id (str):
people_by_area (dict):
area_stats (dict):
Returns:
list: Successfully created demoted households
"""
demoted_households = []
# Use working_composition for demotion (the expanded one if applicable)
demoted_compositions = self.composition_manager.demote_failed_households(working_composition, failed_count, people_by_area, area_id)
for demoted_comp, demoted_count in demoted_compositions.items():
if working_composition != original_composition:
logger.debug(f"Demoting {demoted_count} failed '{original_composition}' (expanded to '{working_composition}') households to '{demoted_comp}'")
else:
logger.debug(f"Demoting {demoted_count} failed '{original_composition}' households to '{demoted_comp}'")
# Track demotion in stats
if original_composition not in area_stats['demotions']:
area_stats['demotions'][original_composition] = {}
area_stats['demotions'][original_composition][demoted_comp] = demoted_count
# Try to create the demoted households
demoted_kids, demoted_ya, demoted_adults, demoted_old = self.composition_manager.parse_composition_string(demoted_comp)
# Check if demoted composition is feasible
if not self.household_factory.check_household_feasibility(area_id, demoted_kids, demoted_ya, demoted_adults, demoted_old, people_by_area):
logger.debug(f"Demoted composition '{demoted_comp}' not feasible - insufficient people")
continue
demoted_successful = 0
for i in range(demoted_count):
demoted_household = self.household_factory.create_household_from_composition(
area, area_id, demoted_kids, demoted_ya, demoted_adults, demoted_old, people_by_area,
original_composition=demoted_comp # Use demoted composition as baseline for further tracking
)
if demoted_household:
# Preserve the original literal composition for expandability
demoted_household.literal_composition = original_composition
demoted_households.append(demoted_household)
demoted_successful += 1
else:
logger.debug(f"Failed to create demoted household {i+1}/{demoted_count} of type '{demoted_comp}'")
# Track demoted households in stats
if demoted_comp not in area_stats['creation_results']:
area_stats['creation_results'][demoted_comp] = {
'requested': 0, 'created': 0, 'success_rate': 0
}
area_stats['creation_results'][demoted_comp]['requested'] += demoted_count
area_stats['creation_results'][demoted_comp]['created'] += demoted_successful
return demoted_households
def _collect_unallocated_people(self, area_id: str, people_by_area: dict):
"""Collect unallocated people for this area.
Args:
area_id (str):
people_by_area (dict):
Returns:
dict or None: area_unallocated if there are unallocated people, None otherwise
"""
area_unallocated = {}
for age_group in ["kids", "young_adults", "adults", "old_adults"]:
area_unallocated[age_group] = {}
for sex in ["m", "f"]:
# Convert deque to list of Person objects for external use
remaining_deque = people_by_area.get(area_id, {}).get(age_group, {}).get(sex, deque())
people_list = list(remaining_deque)
area_unallocated[age_group][sex] = people_list
# Only return areas that have unallocated people
total_unallocated = sum(len(area_unallocated[ag][sex]) for ag in area_unallocated for sex in area_unallocated[ag])
if total_unallocated > 0:
if logger.isEnabledFor(logging.DEBUG):
# Only collect IDs when debug logging is enabled
unallocated_ids = []
for age_group in area_unallocated:
for sex in area_unallocated[age_group]:
for person in area_unallocated[age_group][sex]:
unallocated_ids.append(getattr(person, 'id', str(person)))
logger.debug(f"Area {area_id}: {total_unallocated} unallocated people with IDs: {unallocated_ids}")
return area_unallocated if total_unallocated > 0 else None
def allocate_remaining_people_to_expandable_households(self, households, unallocated_people, mode):
"""
Args:
households:
unallocated_people:
mode:
"""
# Count total unallocated people for this mode
total_unallocated = 0
areas_with_unallocated = 0
for area_id, area_data in unallocated_people.items():
if mode in area_data:
area_count = sum(len(people) for people in area_data[mode].values())
if area_count > 0:
total_unallocated += area_count
areas_with_unallocated += 1
logger.info(f"Allocating remaining {mode} to expandable households...")
logger.info(f"Found {total_unallocated:,} unallocated {mode} across {areas_with_unallocated:,} areas")
if not households or not unallocated_people:
logger.debug(f"No households or unallocated people available for {mode} allocation")
return unallocated_people
# Identify expandable households by area
expandable_households_by_area = self.excess_people_allocator.identify_expandable_households(households, mode)
if not expandable_households_by_area:
logger.debug(f"No households with composition for {mode} found for {mode} allocation")
return unallocated_people
total_expandable = sum(len(households) for households in expandable_households_by_area.values())
logger.info(f"Found {total_expandable:,} expandable households across {len(expandable_households_by_area):,} areas")
updated_unallocated = self.excess_people_allocator.create_working_copy(unallocated_people)
# First allocation round - allocate to existing expandable households
total_people_allocated = self.excess_people_allocator.allocate_people_to_areas(
updated_unallocated, expandable_households_by_area, households, mode
)
# Check for overcrowding and handle second allocation round if needed
promoted_households = self.excess_people_allocator.promote_households_if_overcrowded(
households, expandable_households_by_area, updated_unallocated, mode
)
if promoted_households > 0:
additional_allocated = self.excess_people_allocator.handle_second_allocation_round(
expandable_households_by_area, updated_unallocated, mode
)
total_people_allocated += additional_allocated
if mode == 'young_adults' or mode == 'adults' or mode == 'old_adults':
# Create flexible households for any remaining non-kids who couldn't be placed
flexible_allocated = self.flexible_household_manager.create_flexible_households_for_remaining_non_kids(
households, expandable_households_by_area, updated_unallocated, mode, self.data_loader
)
total_people_allocated += flexible_allocated
# Log final allocation summary
remaining_unallocated = sum(len(people) for area_data in updated_unallocated.values()
if mode in area_data for people in area_data[mode].values())
allocation_success_rate = ((total_unallocated - remaining_unallocated) / total_unallocated * 100) if total_unallocated > 0 else 100
logger.info(f"Completed {mode} allocation: {total_people_allocated:,} allocated, {remaining_unallocated:,} remaining ({allocation_success_rate:.1f}% success rate)")
# Generate comprehensive summary
""" self.stats_reporter.log_unassigned_people_allocation_summary(
expandable_households_by_area, unallocated_people, updated_unallocated, total_people_allocated, mode
) """
return updated_unallocated
def allocate_all_remaining_people_to_expandable_households(self, households, unallocated_people):
"""Allocate all remaining people to expandable households in 4 rounds with different modes.
Args:
households (Households): The households object containing all created households
unallocated_people (dict): Dictionary of unallocated people by area/age_group/sex
Returns:
dict: Updated unallocated_people dictionary after all 4 allocation rounds
"""
modes = ['kids', 'young_adults', 'adults', 'old_adults']
logger.info(f"Starting allocation of remaining unallocated people across {len(modes)} age groups...")
for i, mode in enumerate(modes, 1):
logger.info(f"Processing age group {i}/{len(modes)}: {mode}")
unallocated_people = self.allocate_remaining_people_to_expandable_households(
households, unallocated_people, mode
)
# Log final summary
final_unallocated = sum(
len(people) for area_data in unallocated_people.values()
for age_group in area_data.values()
for people in age_group.values()
)
logger.info(f"Completed all remaining people allocation - {final_unallocated:,} people still unallocated")
return unallocated_people
|