|
| 1 | +config_validation: |
| 2 | + validate: True |
| 3 | + path: src/dev_config_schema.yaml |
| 4 | +dev_global: |
| 5 | + # Logging settings |
| 6 | + logging_level: "DEBUG" |
| 7 | + # Environment settings |
| 8 | + dev_test : False |
| 9 | + platform: network # network #whether to load from hdfs, network (Windows) or s3 (CDP) |
| 10 | + load_from_feather: True |
| 11 | +runlog_writer: |
| 12 | + write_csv: True # Write the runlog to a CSV file |
| 13 | + write_hdf5: False # Write the runlog to an HDF5 file |
| 14 | + write_sql: False # Write the runlog to a SQL database |
| 15 | + display: False # Display the runlog in the terminal |
| 16 | + log_path: "/bat/res_dev/project_data/logs" |
| 17 | +s3_paths: |
| 18 | + logs_foldername: "/bat/res_dev/project_data/logs/run_logs" |
| 19 | +staging_paths: |
| 20 | + folder: "01_staging" |
| 21 | + feather_output: "feather" |
| 22 | + staging_output_path: "staging_qa/full_responses_qa" |
| 23 | + pcode_val_path: "staging_qa/postcode_validation" |
| 24 | +freezing_paths: |
| 25 | + folder: "02_freezing" |
| 26 | + frozen_data_staged_output_path: "frozen_data_staged" |
| 27 | + frozen_data_staged_path: "frozen_data_staged" |
| 28 | + freezing_changes_to_review_path: "changes_to_review" |
| 29 | + freezing_amendments_path: "freezing_updates" |
| 30 | + freezing_additions_path: "freezing_updates" |
| 31 | +ni_paths: |
| 32 | + folder: "03_northern_ireland" |
| 33 | + ni_staging_output_path: "ni_staging_qa" |
| 34 | +construction_paths: |
| 35 | + folder: "04_construction" |
| 36 | + qa_path: "construction_qa" |
| 37 | +mapping_paths: |
| 38 | + folder: "05_mapping" |
| 39 | + qa_path: "mapping_qa" |
| 40 | +imputation_paths: |
| 41 | + folder: "06_imputation" |
| 42 | + qa_path: "imputation_qa" |
| 43 | + manual_trimming_path: "manual_trimming" |
| 44 | + backdata_out_path: "backdata_output" |
| 45 | +outliers_paths: |
| 46 | + folder: "07_outliers" |
| 47 | + qa_path: "outliers_qa" |
| 48 | + auto_outliers_path: "auto_outliers" |
| 49 | +estimation_paths: |
| 50 | + folder: "08_estimation" |
| 51 | + qa_path: "estimation_qa" |
| 52 | +apportionment_paths: |
| 53 | + folder: "09_apportionment" |
| 54 | + qa_path: "apportionment_qa" |
| 55 | +outputs_paths: |
| 56 | + folder: "10_outputs" |
| 57 | + #TODO: add all the output subpaths |
| 58 | + outputs_master: "" |
| 59 | +pnp_paths: |
| 60 | + staging_qa_path: "01_staging/pnp_staging_qa" |
| 61 | +export_paths: |
| 62 | + export_folder: "outgoing_export" |
| 63 | +network_paths: |
| 64 | + root: "R:/BERD Results System Development 2023/DAP_emulation/" |
| 65 | + logs_foldername: "logs/run_logs" |
| 66 | + # snapshot_path: "R:/BERD Results System Development 2023/DAP_emulation/spp_snapshots/2023_snapshots/snapshot-202312-002-b9b6048a-51c9-4669-919a-e92fc6e9c433.json" |
| 67 | + snapshot_path: "R:/BERD Results System Development 2023/DAP_emulation/spp_snapshots/2023_snapshots/snapshot-202312-002-85ae5659-7147-42c3-a5dd-d69beccc9e09.json" |
| 68 | + updated_snapshot_path: "R:/BERD Results System Development 2023/DAP_emulation/spp_snapshots/2023_snapshots/snapshot-202312-002-b9b6048a-51c9-4669-919a-e92fc6e9c433.json" |
| 69 | + ni_full_responses_path: "R:/BERD Results System Development 2023/DAP_emulation/2023_surveys/BERD/03_northern_ireland/2023/ONS_Data_RD2022_Revised_Dataset_Weighted_Unrounded_NISRA.csv" |
| 70 | + # 2022 paths |
| 71 | + # snapshot_path: "R:/BERD Results System Development 2023/DAP_emulation/spp_snapshots/2022_snapshots/snapshot-202212-002-83b5bacd-7c99-45cf-b989-d43d762dd054.json" |
| 72 | + # updated_snapshot_path: "R:/BERD Results System Development 2023/DAP_emulation/spp_snapshots/2022_snapshots/snapshot-202212-002-83b5bacd-7c99-45cf-b989-d43d762dd054.json" |
| 73 | + # Freezing data paths |
| 74 | + frozen_data_staged_output_path: "02_freezing/frozen_data_staged/" |
| 75 | + berd_frozen_data_staged_path: "02_freezing/frozen_data_staged/2023_FROZEN_staged_full_responses_25-04-29_v104.csv" |
| 76 | + pnp_frozen_data_staged_path: "02_freezing/frozen_data_staged/PNP_2023_FROZEN_staged_full_responses_25-01-29_v598.csv" |
| 77 | + freezing_changes_to_review_path: "02_freezing/changes_to_review/" |
| 78 | + freezing_additions_path: "02_freezing/freezing_updates/2023_freezing_additions_to_review_25-04-29_v108_all_true.csv" |
| 79 | + freezing_deletions_path: "02_freezing/freezing_updates/2023_freezing_deletions_to_review_25-04-29_v108_all_true.csv" |
| 80 | + freezing_amendments_path: "02_freezing/freezing_updates/2023_freezing_amendments_to_review_25-04-29_v108_all_true.csv" |
| 81 | + # Imputation and outliers input paths |
| 82 | + # backdata_path: "R:/BERD Results System Development 2023/DAP_emulation/2021_surveys/BERD/06_imputation/backdata_output/2021_backdata_oct_24.csv" |
| 83 | + backdata_path: "R:/BERD Results System Development 2023/DAP_emulation/2022_surveys/BERD/06_imputation/backdata_output/2022_backdata_published_v347.csv" |
| 84 | + pnp_backdata_path: "R:/BERD Results System Development 2023/DAP_emulation/2021_surveys/PNP/06_imputation/backdata_output/PNP_2021_backdata_with_pg.csv" |
| 85 | + manual_imp_trim_path: "06_imputation/manual_trimming/2023_manual_trimming_v1.csv" |
| 86 | + manual_outliers_path: "07_outliers/manual_outliers/2023_manual_outliers_v1.csv" |
| 87 | + # Construction paths |
| 88 | + all_data_construction_file_path: "04_construction/manual_construction/2023_test_construction_file_v3.csv" |
| 89 | + postcode_construction_file_path: "04_construction/manual_construction/2023_test_postcode_construction_file.csv" |
| 90 | + construction_file_path_ni: "04_construction/manual_construction/test_construction_ni_file.csv" |
| 91 | + # postcode paths |
| 92 | + postcode_masterlist: "R:/BERD Results System Development 2023/DAP_emulation/ONS_Postcode_Reference/postcodes_pcd2_itl.csv" |
| 93 | + pcode_val_path: "01_staging/staging_qa/postcode_validation" |
| 94 | +# schema paths |
| 95 | +schema_paths: |
| 96 | + manual_trimming_schema: "config/output_schemas/manual_trimming_qa_schema.toml" |
| 97 | + short_form_schema: "config/output_schemas/short_form_schema.toml" |
| 98 | + long_form_schema: "config/output_schemas/long_form_schema.toml" |
| 99 | + tau_schema: "config/output_schemas/tau_schema.toml" |
| 100 | + gb_sas_schema: "config/output_schemas/gb_sas_schema.toml" |
| 101 | + ni_sas_schema: "config/output_schemas/ni_sas_schema.toml" |
| 102 | + intram_by_pg_gb_schema: "config/output_schemas/intram_by_pg_gb_schema.toml" |
| 103 | + intram_by_pg_uk_schema: "config/output_schemas/intram_by_pg_uk_schema.toml" |
| 104 | + intram_gb_itl1_schema: "config/output_schemas/intram_gb_itl1_schema.toml" |
| 105 | + intram_gb_itl2_schema: "config/output_schemas/intram_gb_itl2_schema.toml" |
| 106 | + intram_uk_itl1_schema: "config/output_schemas/intram_uk_itl1_schema.toml" |
| 107 | + intram_uk_itl2_schema: "config/output_schemas/intram_uk_itl2_schema.toml" |
| 108 | + intram_by_sic_schema: "config/output_schemas/intram_by_sic_schema.toml" |
| 109 | + status_filtered_qa_schema: "config/output_schemas/status_filtered_qa_schema.toml" |
| 110 | + fte_total_qa_schema: "config/output_schemas/fte_total_qa_schema.toml" |
| 111 | + frozen_group_schema: "config/output_schemas/frozen_group_schema.toml" |
| 112 | + full_estimation_qa_schema: "config/output_schemas/full_estimation_qa_schema.toml" |
| 113 | + full_responses_imputed_schema: "config/output_schemas/full_responses_imputed_schema.toml" |
| 114 | + staged_full_responses_schema: "config/output_schemas/staged_full_responses_schema.toml" |
| 115 | + invalid_unrecognised_postcodes_schema: "config/output_schemas/invalid_unrecognised_postcodes_schema.toml" |
| 116 | + full_responses_mapped_schema: "config/output_schemas/full_responses_mapped_schema.toml" |
| 117 | + pnp_national_accounts_schema: "config/output_schemas/pnp_national_accounts_schema.toml" |
| 118 | + |
| 119 | +# Export config for users |
| 120 | +mappers: |
| 121 | + geo_cols: ["ITL221CD", "ITL221NM", "ITL121CD", "ITL121NM"] |
| 122 | + gb_itl: "LAU121CD" |
| 123 | + ni_itl: "N92000002" |
| 124 | +outliers: |
| 125 | + flag_cols: ["701", "702", "703", "704", "705", "706", "707"] # NOT for user config. Columns to flag for outliers. |
| 126 | +devtest: |
| 127 | + seltype_list: [1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19, 21, 22, 23, 25, 26, 27, 29, 30, 31, 33, 34, 35, 37, 38, 39] |
| 128 | +log_filenames: |
| 129 | + main: "main_runlog.csv" |
| 130 | + configs: "configs_runlog.csv" |
| 131 | + logs: "logs_runlog.csv" |
| 132 | +run_log_sql: |
| 133 | + log_db: "test_runlog" |
| 134 | + log_mode: "append" |
| 135 | +estimation: |
| 136 | + numeric_cols: ["701", "702", "703", "704", "705", "706", "707", "709", "710", "711"] |
| 137 | +imputation: |
| 138 | + lf_target_vars: |
| 139 | + - "211" |
| 140 | + - "305" |
| 141 | + - "emp_researcher" |
| 142 | + - "emp_technician" |
| 143 | + - "emp_other" |
| 144 | + - "headcount_res_m" |
| 145 | + - "headcount_res_f" |
| 146 | + - "headcount_tec_m" |
| 147 | + - "headcount_tec_f" |
| 148 | + - "headcount_oth_m" |
| 149 | + - "headcount_oth_f" |
| 150 | + sum_cols: |
| 151 | + - "emp_total" |
| 152 | + - "headcount_tot_m" |
| 153 | + - "headcount_tot_f" |
| 154 | + - "headcount_total" |
| 155 | +breakdowns: |
| 156 | + "211": |
| 157 | + - "202" |
| 158 | + - "203" |
| 159 | + - "204" |
| 160 | + - "205" |
| 161 | + - "206" |
| 162 | + - "207" |
| 163 | + - "209" |
| 164 | + - "210" |
| 165 | + - "212" |
| 166 | + - "214" |
| 167 | + - "216" |
| 168 | + - "218" |
| 169 | + - "219" |
| 170 | + - "220" |
| 171 | + - "221" |
| 172 | + - "222" |
| 173 | + - "223" |
| 174 | + - "225" |
| 175 | + - "226" |
| 176 | + - "227" |
| 177 | + - "228" |
| 178 | + - "229" |
| 179 | + - "237" |
| 180 | + - "242" |
| 181 | + - "243" |
| 182 | + - "244" |
| 183 | + - "245" |
| 184 | + - "246" |
| 185 | + - "247" |
| 186 | + - "248" |
| 187 | + - "249" |
| 188 | + - "250" |
| 189 | + "305": |
| 190 | + - "302" |
| 191 | + - "303" |
| 192 | + - "304" |
| 193 | + emp_total: |
| 194 | + - "emp_researcher" |
| 195 | + - "emp_technician" |
| 196 | + - "emp_other" |
| 197 | + headcount_total: |
| 198 | + - "headcount_res_m" |
| 199 | + - "headcount_res_f" |
| 200 | + - "headcount_tec_m" |
| 201 | + - "headcount_tec_f" |
| 202 | + - "headcount_oth_m" |
| 203 | + - "headcount_oth_f" |
| 204 | +consistency_checks: |
| 205 | + 2xx_totals: |
| 206 | + purchases_split: ["222", "223", "203"] |
| 207 | + sal_oth_expend: ["202", "203", "204"] |
| 208 | + research_expend: ["205", "206", "207", "204"] |
| 209 | + capex: ["219", "220", "209", "210"] |
| 210 | + intram: ["204", "210", "211"] |
| 211 | + funding: ['212', '214', '216', '242', '250', '243', '244', '245', '246', '247', '248', '249', '218'] |
| 212 | + ownership: ['225', '226', '227', '228', '229', '237', '218'] |
| 213 | + equality: ['211', '218'] |
| 214 | + inequality: ["221"] |
| 215 | + 3xx_totals: |
| 216 | + purchases: ['302', '303', '304', '305'] |
| 217 | + 4xx_totals: |
| 218 | + emp_civil: ['405', '407', '409', '411'] |
| 219 | + emp_defence: ['406', '408', '410', '412'] |
| 220 | + 5xx_totals: |
| 221 | + headcount_tot_m: ['501', '503', '505', '507'] |
| 222 | + headcount_tot_f: ['502', '504', '506', '508'] |
| 223 | + emp_xx_totals: |
| 224 | + employment: ["emp_researcher", "emp_technician", "emp_other", "emp_total"] |
| 225 | + hc_xx_totals: |
| 226 | + headcount_tot_m: ["headcount_res_m", "headcount_tec_m", "headcount_oth_m", "headcount_tot_m"] |
| 227 | + headcount_tot_f: ["headcount_res_f", "headcount_tec_f", "headcount_oth_f", "headcount_tot_f"] |
| 228 | + headcount_total: ["headcount_tot_m", "headcount_tot_f", "headcount_total"] |
| 229 | + 6xx_totals: |
| 230 | + site_percentage: ["602"] |
| 231 | + 7xx_a_totals: |
| 232 | + sf_expend: ["701", "702", "709"] |
| 233 | + sf_purchases: ["703", "704", "710"] |
| 234 | + 7xx_b_totals: |
| 235 | + sf_fte: ["706", "707", "711"] |
| 236 | + sf_headcount: ["705"] |
| 237 | + |
| 238 | +s3: |
| 239 | + ssl_file: "/etc/pki/tls/certs/ca-bundle.crt" |
| 240 | + s3_bucket: "onscdp-dev-data01-5320d6ca" |
| 241 | + #s3_bucket: "onscdp-mig-data01-0221a8af" |
0 commit comments