@@ -1012,6 +1012,137 @@ def add_ssn_card_type(cps: h5py.File, person: pd.DataFrame) -> None:
10121012 f"Condition 14 (SSI recipients): { condition_14_count :,} people moved to code 3"
10131013 )
10141014
1015+ # ============================================================================
1016+ # DISTRIBUTION AFTER ASEC CONDITIONS
1017+ # ============================================================================
1018+
1019+ final_counts = pd .Series (ssn_card_type ).value_counts ().sort_index ()
1020+ print (f"\n Distribution after ASEC conditions:" )
1021+ print (f"Code 0 (NONE - likely undocumented): { final_counts .get (0 , 0 ):,} " )
1022+ print (f"Code 1 (CITIZEN): { final_counts .get (1 , 0 ):,} " )
1023+ print (f"Code 2 (NON_CITIZEN_VALID_EAD): { final_counts .get (2 , 0 ):,} " )
1024+ print (f"Code 3 (OTHER_NON_CITIZEN): { final_counts .get (3 , 0 ):,} " )
1025+
1026+ # ============================================================================
1027+ # FAMILY CORRELATION ADJUSTMENT
1028+ # ============================================================================
1029+
1030+ print (f"\n --- Family Correlation Adjustment ---" )
1031+
1032+ # Identify parent-child relationships using household and family data
1033+ correlation_probability = 0.8
1034+ rng_family = np .random .default_rng (seed = 123 )
1035+
1036+ # Create a DataFrame for easier family processing
1037+ family_df = pd .DataFrame (
1038+ {
1039+ "person_id" : person .PH_SEQ * 100 + person .P_SEQ ,
1040+ "household_id" : person .PH_SEQ ,
1041+ "family_id" : person .PH_SEQ * 10 + person .PF_SEQ ,
1042+ "age" : person .A_AGE ,
1043+ "parent1_line" : person .PEPAR1 , # Line number of first parent
1044+ "parent2_line" : person .PEPAR2 , # Line number of second parent
1045+ "line_number" : person .A_LINENO ,
1046+ "ssn_code" : ssn_card_type ,
1047+ }
1048+ )
1049+
1050+ # Identify children (those with parent pointers)
1051+ children = family_df [
1052+ (family_df .parent1_line > 0 ) | (family_df .parent2_line > 0 )
1053+ ]
1054+
1055+ families_adjusted = 0
1056+
1057+ for _ , child in children .iterrows ():
1058+ # Only process if child is eligible (codes 0 or 3)
1059+ if child .ssn_code not in [0 , 3 ]:
1060+ continue
1061+
1062+ # Find parents in the same household
1063+ household_members = family_df [
1064+ family_df .household_id == child .household_id
1065+ ]
1066+
1067+ parents = household_members [
1068+ (household_members .line_number == child .parent1_line )
1069+ | (household_members .line_number == child .parent2_line )
1070+ ]
1071+
1072+ if len (parents ) > 0 :
1073+ # Only consider parents who are eligible (codes 0 or 3)
1074+ eligible_parents = parents [parents .ssn_code .isin ([0 , 3 ])]
1075+
1076+ # Skip if no eligible parents
1077+ if len (eligible_parents ) == 0 :
1078+ continue
1079+
1080+ child_has_code_0 = child .ssn_code == 0
1081+ parents_have_code_0 = (eligible_parents .ssn_code == 0 ).any ()
1082+
1083+ # Check if alignment is needed (80% probability)
1084+ if child_has_code_0 != parents_have_code_0 :
1085+ if rng_family .random () < correlation_probability :
1086+ child_idx = np .where (
1087+ family_df .person_id == child .person_id
1088+ )[0 ][0 ]
1089+
1090+ if parents_have_code_0 and not child_has_code_0 :
1091+ # Change child to code 0 if parent has code 0
1092+ if (
1093+ ssn_card_type [child_idx ] == 3
1094+ ): # Only change if currently code 3
1095+ ssn_card_type [child_idx ] = 0
1096+ families_adjusted += 1
1097+ elif child_has_code_0 and not parents_have_code_0 :
1098+ # Change child to code 3 if parent doesn't have code 0
1099+ ssn_card_type [child_idx ] = 3
1100+ families_adjusted += 1
1101+
1102+ print (
1103+ f"Family correlation adjustments: { families_adjusted :,} people affected"
1104+ )
1105+
1106+ # Calculate actual correlation (only among eligible families)
1107+ children_with_parents = []
1108+ for _ , child in children .iterrows ():
1109+ # Only consider eligible children
1110+ if child .ssn_code not in [0 , 3 ]:
1111+ continue
1112+
1113+ household_members = family_df [
1114+ family_df .household_id == child .household_id
1115+ ]
1116+ parents = household_members [
1117+ (household_members .line_number == child .parent1_line )
1118+ | (household_members .line_number == child .parent2_line )
1119+ ]
1120+
1121+ if len (parents ) > 0 :
1122+ # Only consider eligible parents
1123+ eligible_parents = parents [parents .ssn_code .isin ([0 , 3 ])]
1124+
1125+ if len (eligible_parents ) > 0 :
1126+ child_code_0 = child .ssn_code == 0
1127+ parent_code_0 = (eligible_parents .ssn_code == 0 ).any ()
1128+ children_with_parents .append ((child_code_0 , parent_code_0 ))
1129+
1130+ if children_with_parents :
1131+ matches = sum (
1132+ 1
1133+ for child_code , parent_code in children_with_parents
1134+ if child_code == parent_code
1135+ )
1136+ correlation = matches / len (children_with_parents )
1137+ print (f"Achieved parent-child code 0 correlation: { correlation :.1%} " )
1138+ print (
1139+ f"Eligible parent-child pairs analyzed: { len (children_with_parents ):,} "
1140+ )
1141+ else :
1142+ print (
1143+ f"No eligible parent-child relationships found for correlation calculation"
1144+ )
1145+
10151146 # ============================================================================
10161147 # RANDOM REFINEMENT OF REMAINING CODE 0s
10171148 # ============================================================================
0 commit comments