From 640896cac8a2bc7d359f9a0f6116f42098c4a709 Mon Sep 17 00:00:00 2001 From: risa Date: Fri, 15 Nov 2024 16:46:01 -0600 Subject: [PATCH 1/8] add yml properties for models in this folder --- dbt/models/staging/_staging_properties.yml | 718 +++++++++++++++++++++ 1 file changed, 718 insertions(+) create mode 100644 dbt/models/staging/_staging_properties.yml diff --git a/dbt/models/staging/_staging_properties.yml b/dbt/models/staging/_staging_properties.yml new file mode 100644 index 0000000..d86cd3a --- /dev/null +++ b/dbt/models/staging/_staging_properties.yml @@ -0,0 +1,718 @@ +version: 2 + +models: + - name: stg_all_candidates + description: | + "A candidate is an individual seeking nomination for election to a federal office. People become candidates when they (or agents working on their behalf) + raise contributions or make expenditures that exceed $5,000." + columns: + - name: cand_id + data_type: varchar + description: | + "A unique identifier assigned to each candidate registered with the FEC. If a person runs for several offices, that person will have separate candidate IDs for each office. + First character indicates office - [P]residential, [H]ouse, [S]enate]. + Second character is the last digit of the two-year period the ID was created. + Third and fourth is the candidate state. Presidential IDs don't have state. + Fifth and sixth is the district when the candidate first ran. This does not change if the candidate/member's district changes during re-districting. + Presidential IDs don't have districts. The rest is sequence." + + - name: cand_name + data_type: varchar + description: "" + + - name: cand_ici + data_type: varchar + description: "" + + - name: pty_cd + data_type: varchar + description: "Three-letter code for the party affiliated with a candidate or committee. For example, DEM for Democratic Party and REP for Republican Party." + + - name: cand_pty_affiliation + data_type: varchar + description: "" + + - name: ttl_receipts + data_type: decimal(14,2) + description: "" + + - name: trans_from_auth + data_type: decimal(14,2) + description: "" + + - name: ttl_disb + data_type: decimal(14,2) + description: "" + + - name: trans_to_auth + data_type: decimal(14,2) + description: "" + + - name: coh_bop + data_type: decimal(14,2) + description: "" + + - name: coh_cop + data_type: decimal(14,2) + description: "" + + - name: cand_contrib + data_type: decimal(14,2) + description: "" + + - name: cand_loans + data_type: decimal(14,2) + description: "" + + - name: other_loans + data_type: decimal(14,2) + description: "" + + - name: cand_loan_repay + data_type: decimal(14,2) + description: "" + + - name: other_loan_repay + data_type: decimal(14,2) + description: "" + + - name: debts_owed_by + data_type: decimal(14,2) + description: "" + + - name: ttl_indiv_contrib + data_type: decimal(14,2) + description: "" + + - name: cand_office_st + data_type: varchar + description: "" + + - name: cand_office_district + data_type: varchar + description: "Two-digit US House distirict of the office the candidate is running for. Presidential, Senate and House at-large candidates will have District 00." + + - name: spec_election + data_type: varchar + description: "" + + - name: prim_election + data_type: varchar + description: "" + + - name: run_election + data_type: varchar + description: "" + + - name: gen_election + data_type: varchar + description: "" + + - name: gen_election_precent + data_type: decimal(7,4) + description: "" + + - name: other_pol_cmte_contrib + data_type: decimal(14,2) + description: "" + + - name: pol_pty_contrib + data_type: decimal(14,2) + description: "" + + - name: cvg_end_dt + data_type: varchar + description: "" + + - name: indiv_refunds + data_type: decimal(14,2) + description: "" + + - name: cmte_refunds + data_type: decimal(14,2) + description: "" + + - name: stg_candidate_committee_linkage + description: "" + columns: + - name: cand_id + data_type: varchar + description: "" + + - name: cand_election_yr + data_type: varchar + description: "" + + - name: fec_election_yr + data_type: varchar + description: "" + + - name: cmte_id + data_type: varchar + description: "" + + - name: cmte_tp + data_type: varchar + description: "" + + - name: cmte_dsgn + data_type: varchar + description: "" + + - name: linkage_id + data_type: varchar + description: "" + + - name: stg_candidate_master + description: "" + columns: + - name: cand_id + data_type: varchar + description: "" + + - name: cand_name + data_type: varchar + description: "" + + - name: cand_pty_affiliation + data_type: varchar + description: "" + + - name: cand_election_yr + data_type: varchar + description: "" + + - name: cand_office_st + data_type: varchar + description: "" + + - name: cand_office + data_type: varchar + description: "" + + - name: cand_office_district + data_type: varchar + description: "" + + - name: cand_ici + data_type: varchar + description: "" + + - name: cand_status + data_type: varchar + description: "" + + - name: cand_pcc + data_type: varchar + description: "" + + - name: cand_st1 + data_type: varchar + description: "" + + - name: cand_st2 + data_type: varchar + description: "" + + - name: cand_city + data_type: varchar + description: "" + + - name: cand_st + data_type: varchar + description: "" + + - name: cand_zip + data_type: varchar + description: "" + + - name: stg_committee_master + description: "" + columns: + - name: cmte_id + data_type: varchar + description: "" + + - name: cmte_nm + data_type: varchar + description: "" + + - name: tres_nm + data_type: varchar + description: "" + + - name: cmte_st1 + data_type: varchar + description: "" + + - name: cmte_st2 + data_type: varchar + description: "" + + - name: cmte_city + data_type: varchar + description: "" + + - name: cmte_st + data_type: varchar + description: "" + + - name: cmte_zip + data_type: varchar + description: "" + + - name: cmte_dsgn + data_type: varchar + description: "" + + - name: cmte_tp + data_type: varchar + description: "" + + - name: cmte_pty_affiliation + data_type: varchar + description: "" + + - name: cmte_filing_freq + data_type: varchar + description: "" + + - name: org_tp + data_type: varchar + description: "" + + - name: connected_org_nm + data_type: varchar + description: "" + + - name: cand_id + data_type: varchar + description: "" + + - name: stg_contributions_from_committees_to_candidates + description: "" + columns: + - name: cmte_id + data_type: varchar + description: "" + + - name: amndt_ind + data_type: varchar + description: "" + + - name: rpt_tp + data_type: varchar + description: "" + + - name: transaction_pgi + data_type: varchar + description: "" + + - name: image_num + data_type: varchar + description: "" + + - name: transaction_tp + data_type: varchar + description: "" + + - name: entity_tp + data_type: varchar + description: "" + + - name: name + data_type: varchar + description: "" + + - name: city + data_type: varchar + description: "" + + - name: state + data_type: varchar + description: "" + + - name: zip_code + data_type: varchar + description: "" + + - name: employer + data_type: varchar + description: "" + + - name: occupation + data_type: varchar + description: "" + + - name: transaction_dt + data_type: varchar + description: "" + + - name: transaction_amt + data_type: decimal(14,2) + description: "" + + - name: other_id + data_type: varchar + description: "" + + - name: cand_id + data_type: varchar + description: "" + + - name: tran_id + data_type: varchar + description: "" + + - name: file_num + data_type: varchar + description: "" + + - name: memo_cd + data_type: varchar + description: "" + + - name: memo_text + data_type: varchar + description: "" + + - name: sub_id + data_type: varchar + description: "" + + - name: stg_house_senate + description: "" + columns: + - name: cand_id + data_type: varchar + description: "" + + - name: cand_name + data_type: varchar + description: "" + + - name: cand_ici + data_type: varchar + description: "" + + - name: pty_cd + data_type: varchar + description: "" + + - name: cand_pty_affiliation + data_type: varchar + description: "" + + - name: ttl_receipts + data_type: decimal(14,2) + description: "" + + - name: trans_from_auth + data_type: decimal(14,2) + description: "" + + - name: ttl_disb + data_type: decimal(14,2) + description: "" + + - name: trans_to_auth + data_type: decimal(14,2) + description: "" + + - name: coh_bop + data_type: decimal(14,2) + description: "" + + - name: coh_cop + data_type: decimal(14,2) + description: "" + + - name: cand_contrib + data_type: decimal(14,2) + description: "" + + - name: cand_loans + data_type: decimal(14,2) + description: "" + + - name: other_loans + data_type: decimal(14,2) + description: "" + + - name: cand_loan_repay + data_type: decimal(14,2) + description: "" + + - name: other_loan_repay + data_type: decimal(14,2) + description: "" + + - name: debts_owed_by + data_type: decimal(14,2) + description: "" + + - name: ttl_indiv_contrib + data_type: decimal(14,2) + description: "" + + - name: cand_office_st + data_type: varchar + description: "" + + - name: cand_office_district + data_type: varchar + description: "" + + - name: spec_election + data_type: varchar + description: "" + + - name: prim_election + data_type: varchar + description: "" + + - name: run_election + data_type: varchar + description: "" + + - name: gen_election + data_type: varchar + description: "" + + - name: gen_election_precent + data_type: decimal(7,4) + description: "" + + - name: other_pol_cmte_contrib + data_type: decimal(14,2) + description: "" + + - name: pol_pty_contrib + data_type: decimal(14,2) + description: "" + + - name: cvg_end_dt + data_type: varchar + description: "" + + - name: indiv_refunds + data_type: decimal(14,2) + description: "" + + - name: cmte_refunds + data_type: decimal(14,2) + description: "" + + - name: stg_operating_expenditures + description: "" + columns: + - name: cmte_id + data_type: varchar + description: "" + + - name: amndt_ind + data_type: varchar + description: "" + + - name: rpt_yr + data_type: varchar + description: "" + + - name: rpt_tp + data_type: varchar + description: "" + + - name: image_num + data_type: varchar + description: "" + + - name: line_num + data_type: varchar + description: "" + + - name: form_tp_cd + data_type: varchar + description: "" + + - name: sched_tp_cd + data_type: varchar + description: "" + + - name: name + data_type: varchar + description: "" + + - name: city + data_type: varchar + description: "" + + - name: state + data_type: varchar + description: "" + + - name: zip_code + data_type: varchar + description: "" + + - name: transaction_dt + data_type: varchar + description: "" + + - name: transaction_amt + data_type: decimal(14,2) + description: "" + + - name: transaction_pgi + data_type: varchar + description: "" + + - name: purpose + data_type: varchar + description: "" + + - name: category + data_type: varchar + description: "" + + - name: category_desc + data_type: varchar + description: "" + + - name: memo_cd + data_type: varchar + description: "" + + - name: memo_text + data_type: varchar + description: "" + + - name: entity_tp + data_type: varchar + description: "" + + - name: sub_id + data_type: varchar + description: "" + + - name: file_num + data_type: varchar + description: "" + + - name: tran_id + data_type: varchar + description: "" + + - name: back_ref_tran_id + data_type: varchar + description: "" + + - name: stg_pac_summary + description: "" + columns: + - name: cmte_id + data_type: varchar + description: "" + + - name: cmte_nm + data_type: varchar + description: "" + + - name: cmte_tp + data_type: varchar + description: "" + + - name: cmte_dsgn + data_type: varchar + description: "" + + - name: cmte_filing_freq + data_type: varchar + description: "" + + - name: ttl_receipts + data_type: decimal(14,2) + description: "" + + - name: trans_from_aff + data_type: decimal(14,2) + description: "" + + - name: indv_contrib + data_type: decimal(14,2) + description: "" + + - name: other_pol_cmte_contrib + data_type: decimal(14,2) + description: "" + + - name: cand_contrib + data_type: decimal(14,2) + description: "" + + - name: cand_loans + data_type: decimal(14,2) + description: "" + + - name: ttl_loans_received + data_type: decimal(14,2) + description: "" + + - name: ttl_disb + data_type: decimal(14,2) + description: "" + + - name: tranf_to_aff + data_type: decimal(14,2) + description: "" + + - name: indv_refunds + data_type: decimal(14,2) + description: "" + + - name: other_pol_cmte_refunds + data_type: decimal(14,2) + description: "" + + - name: cand_loan_repay + data_type: decimal(14,2) + description: "" + + - name: loan_repay + data_type: decimal(14,2) + description: "" + + - name: coh_bop + data_type: decimal(14,2) + description: "" + + - name: coh_cop + data_type: decimal(14,2) + description: "" + + - name: debts_owed_by + data_type: decimal(14,2) + description: "" + + - name: nonfed_trans_received + data_type: decimal(14,2) + description: "" + + - name: contrib_to_other_cmte + data_type: decimal(14,2) + description: "" + + - name: ind_exp + data_type: decimal(14,2) + description: "" + + - name: pty_coord_exp + data_type: decimal(14,2) + description: "" + + - name: nonfed_share_exp + data_type: decimal(14,2) + description: "" + + - name: cvg_end_dt + data_type: varchar + description: "" From 76a8421210ac3e3fd99ee826fb82c543b33f849a Mon Sep 17 00:00:00 2001 From: risa Date: Fri, 15 Nov 2024 16:59:25 -0600 Subject: [PATCH 2/8] started to document models --- dbt/models/staging/_staging_properties.yml | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/dbt/models/staging/_staging_properties.yml b/dbt/models/staging/_staging_properties.yml index d86cd3a..e0a0eab 100644 --- a/dbt/models/staging/_staging_properties.yml +++ b/dbt/models/staging/_staging_properties.yml @@ -133,7 +133,7 @@ models: description: "" - name: stg_candidate_committee_linkage - description: "" + description: "Bridge data source connecting Candidate ID to Committee ID" columns: - name: cand_id data_type: varchar @@ -159,12 +159,12 @@ models: data_type: varchar description: "" - - name: linkage_id + - name: linkage_id # unique key data_type: varchar description: "" - name: stg_candidate_master - description: "" + description: "Detailed records for each candidate, including address, office they are running for, party affiliation, etc." columns: - name: cand_id data_type: varchar @@ -200,7 +200,12 @@ models: - name: cand_status data_type: varchar - description: "" + description: | + "One-letter code explaining if the candidate is: + - C present candidate + - F future candidate + - N not yet a candidate + - P prior candidate" - name: cand_pcc data_type: varchar @@ -227,11 +232,13 @@ models: description: "" - name: stg_committee_master - description: "" + description: | + "All FEC filers, even if not registered as a committee, are included. Officially, committees include the committees and organizations that file with the FEC. Several different types of organizations file financial reports with the FEC: + Campaign committees authorized by particular candidates to raise and spend funds in their campaigns. Non-party committees (e.g., PACs), some of which may be sponsored by corporations, unions, trade or membership groups, etc. Political party committees at the national, state, and local levels. Groups and individuals making only independent expenditures Corporations, unions, and other organizations making internal communicationsThe committee endpoints primarily use data from FEC registration Form 1 and Form 2." columns: - name: cmte_id data_type: varchar - description: "" + description: "A unique identifier assigned to each committee or filer registered with the FEC. In general a committee id begins with the letter C which is followed by eight digits." - name: cmte_nm data_type: varchar From 8317ad4c2b790b54309ecf9eecfa086a7dfad453 Mon Sep 17 00:00:00 2001 From: risa Date: Tue, 19 Nov 2024 21:20:54 -0600 Subject: [PATCH 3/8] fix long-form and fill in descriptions from website --- dbt/models/staging/_staging_properties.yml | 41 ++++++++++------------ 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/dbt/models/staging/_staging_properties.yml b/dbt/models/staging/_staging_properties.yml index e0a0eab..889eebe 100644 --- a/dbt/models/staging/_staging_properties.yml +++ b/dbt/models/staging/_staging_properties.yml @@ -3,18 +3,17 @@ version: 2 models: - name: stg_all_candidates description: | - "A candidate is an individual seeking nomination for election to a federal office. People become candidates when they (or agents working on their behalf) - raise contributions or make expenditures that exceed $5,000." + A candidate is an individual seeking nomination for election to a federal office. People become candidates when they (or agents working on their behalf) raise contributions or make expenditures that exceed $5,000. The all candidate summary file contains one record including summary financial information for all candidates who raised or spent money during the period no matter when they are up for election. columns: - name: cand_id data_type: varchar description: | - "A unique identifier assigned to each candidate registered with the FEC. If a person runs for several offices, that person will have separate candidate IDs for each office. - First character indicates office - [P]residential, [H]ouse, [S]enate]. - Second character is the last digit of the two-year period the ID was created. - Third and fourth is the candidate state. Presidential IDs don't have state. - Fifth and sixth is the district when the candidate first ran. This does not change if the candidate/member's district changes during re-districting. - Presidential IDs don't have districts. The rest is sequence." + A unique identifier assigned to each candidate registered with the FEC. If a person runs for several offices, that person will have separate candidate IDs for each office. + First character indicates office - [P]residential, [H]ouse, [S]enate]. + Second character is the last digit of the two-year period the ID was created. + Third and fourth is the candidate state. Presidential IDs don't have state. + Fifth and sixth is the district when the candidate first ran. This does not change if the candidate/member's district changes during re-districting. + Presidential IDs don't have districts. The rest is sequence. - name: cand_name data_type: varchar @@ -164,7 +163,7 @@ models: description: "" - name: stg_candidate_master - description: "Detailed records for each candidate, including address, office they are running for, party affiliation, etc." + description: "The candidate master file contains one record for each candidate who has either registered with the Federal Election Commission or appeared on a ballot list prepared by a state elections office. Includes address, office they are running for, party affiliation, etc." columns: - name: cand_id data_type: varchar @@ -201,11 +200,11 @@ models: - name: cand_status data_type: varchar description: | - "One-letter code explaining if the candidate is: - - C present candidate - - F future candidate - - N not yet a candidate - - P prior candidate" + One-letter code explaining if the candidate is: + - C present candidate + - F future candidate + - N not yet a candidate + - P prior candidate - name: cand_pcc data_type: varchar @@ -232,9 +231,7 @@ models: description: "" - name: stg_committee_master - description: | - "All FEC filers, even if not registered as a committee, are included. Officially, committees include the committees and organizations that file with the FEC. Several different types of organizations file financial reports with the FEC: - Campaign committees authorized by particular candidates to raise and spend funds in their campaigns. Non-party committees (e.g., PACs), some of which may be sponsored by corporations, unions, trade or membership groups, etc. Political party committees at the national, state, and local levels. Groups and individuals making only independent expenditures Corporations, unions, and other organizations making internal communicationsThe committee endpoints primarily use data from FEC registration Form 1 and Form 2." + description: "The committee master file contains one record for each committee registered with the Federal Election Commission. This includes federal political action committees and party committees, campaign committees for presidential, house and senate candidates, as well as groups or organizations who are spending money for or against candidates for federal office." columns: - name: cmte_id data_type: varchar @@ -297,7 +294,7 @@ models: description: "" - name: stg_contributions_from_committees_to_candidates - description: "" + description: "Contains records of contributions from Committees to Candidates at sub ID level." columns: - name: cmte_id data_type: varchar @@ -383,12 +380,12 @@ models: data_type: varchar description: "" - - name: sub_id + - name: sub_id # unique key data_type: varchar description: "" - name: stg_house_senate - description: "" + description: "Shows total aggregated contributions to Congressional candidates. One record per Congressional candidate." columns: - name: cand_id data_type: varchar @@ -511,7 +508,7 @@ models: description: "" - name: stg_operating_expenditures - description: "" + description: "A master file (oppexp) listing committees' operating expenditures, which are found on Schedule B." columns: - name: cmte_id data_type: varchar @@ -614,7 +611,7 @@ models: description: "" - name: stg_pac_summary - description: "" + description: "This file gives overall receipts and disbursements for each PAC and party committee registered with the commission, along with a breakdown of overall receipts by source and totals for contributions to other committees, independent expenditures made and other information." columns: - name: cmte_id data_type: varchar From 3e5ca7ea7ea3deaaf3bce9645b9d5fe396676763 Mon Sep 17 00:00:00 2001 From: risa Date: Tue, 19 Nov 2024 22:12:03 -0600 Subject: [PATCH 4/8] add markdown file for common docs --- dbt/docs/common_fields.md | 38 ++++++++++ dbt/models/staging/_staging_properties.yml | 85 ++++++++++------------ dbt_project.yml | 1 + 3 files changed, 79 insertions(+), 45 deletions(-) create mode 100644 dbt/docs/common_fields.md diff --git a/dbt/docs/common_fields.md b/dbt/docs/common_fields.md new file mode 100644 index 0000000..a13478c --- /dev/null +++ b/dbt/docs/common_fields.md @@ -0,0 +1,38 @@ +{% docs cand_id %} + A 9-character alpha-numeric code assigned to a candidate by the Federal Election Commission. The candidate ID for a specific candidate remains the same across election cycles as long as the candidate is running for the same office. +{% enddocs %} + +{% docs cand_ici %} + C = Challenger + I = Incumbent + O = Open Seat is used to indicate an open seat; Open seats are defined as seats where the incumbent never sought re-election. +{% enddocs %} + +{% docs cand_office_st %} + House = state of race + President = US + Senate = state of race +{% enddocs %} + +{% docs cand_pcc %} + The ID assigned by the Federal Election Commission to the candidate's PCC (principal campaign committee) for a given election cycle. + {% enddocs %} + +{% docs cand_status %} + C = Statutory candidate + F = Statutory candidate for future election + N = Not yet a statutory candidate + P = Statutory candidate in prior cycle +{% enddocs %} + +{% docs cmte_id %} + A 9-character alpha-numeric code assigned to a committee by the Federal Election Commission. Committee IDs are unique and an ID for a specific committee always remains the same. +{% enddocs %} + +{% docs office_district %} + Two-digit US House distirict of the office the candidate is running for. Presidential, Senate and House at-large candidates will have District 00. +{% enddocs %} + +{% docs pty_affiliation %} + The political party affiliation reported by the candidate. For more information about political party affiliation codes [see this list of political party codes](https://www.fec.gov/campaign-finance-data/party-code-descriptions/). +{% enddocs %} diff --git a/dbt/models/staging/_staging_properties.yml b/dbt/models/staging/_staging_properties.yml index 889eebe..ef3c40c 100644 --- a/dbt/models/staging/_staging_properties.yml +++ b/dbt/models/staging/_staging_properties.yml @@ -3,17 +3,12 @@ version: 2 models: - name: stg_all_candidates description: | - A candidate is an individual seeking nomination for election to a federal office. People become candidates when they (or agents working on their behalf) raise contributions or make expenditures that exceed $5,000. The all candidate summary file contains one record including summary financial information for all candidates who raised or spent money during the period no matter when they are up for election. + A candidate is an individual seeking nomination for election to a federal office. People become candidates when they (or agents working on their behalf) raise contributions + or make expenditures that exceed $5,000. The all candidate summary file contains one record including summary financial information for all candidates who raised or spent money during the period no matter when they are up for election. columns: - name: cand_id data_type: varchar - description: | - A unique identifier assigned to each candidate registered with the FEC. If a person runs for several offices, that person will have separate candidate IDs for each office. - First character indicates office - [P]residential, [H]ouse, [S]enate]. - Second character is the last digit of the two-year period the ID was created. - Third and fourth is the candidate state. Presidential IDs don't have state. - Fifth and sixth is the district when the candidate first ran. This does not change if the candidate/member's district changes during re-districting. - Presidential IDs don't have districts. The rest is sequence. + description: "{{ doc('cand_id') }}" - name: cand_name data_type: varchar @@ -21,15 +16,15 @@ models: - name: cand_ici data_type: varchar - description: "" + description: "{{ doc('cand_ici') }}" - name: pty_cd data_type: varchar - description: "Three-letter code for the party affiliated with a candidate or committee. For example, DEM for Democratic Party and REP for Republican Party." + description: "" - name: cand_pty_affiliation data_type: varchar - description: "" + description: "{{ doc('pty_affiliation') }}" - name: ttl_receipts data_type: decimal(14,2) @@ -85,11 +80,11 @@ models: - name: cand_office_st data_type: varchar - description: "" + description: ""{{ doc('cand_office_st') }}"" - name: cand_office_district data_type: varchar - description: "Two-digit US House distirict of the office the candidate is running for. Presidential, Senate and House at-large candidates will have District 00." + description: ""{{ doc('office_district') }}"" - name: spec_election data_type: varchar @@ -136,7 +131,7 @@ models: columns: - name: cand_id data_type: varchar - description: "" + description: "{{ doc('cand_id') }}" - name: cand_election_yr data_type: varchar @@ -148,7 +143,7 @@ models: - name: cmte_id data_type: varchar - description: "" + description: "{{ doc('cmte_id') }}" - name: cmte_tp data_type: varchar @@ -163,11 +158,12 @@ models: description: "" - name: stg_candidate_master - description: "The candidate master file contains one record for each candidate who has either registered with the Federal Election Commission or appeared on a ballot list prepared by a state elections office. Includes address, office they are running for, party affiliation, etc." + description: "The candidate master file contains one record for each candidate who has either registered with the Federal Election Commission or appeared + on a ballot list prepared by a state elections office. Includes address, office they are running for, party affiliation, etc." columns: - name: cand_id data_type: varchar - description: "" + description: "{{ doc('cand_id') }}" - name: cand_name data_type: varchar @@ -175,67 +171,65 @@ models: - name: cand_pty_affiliation data_type: varchar - description: "" + description: "{{ doc('pty_affiliation') }}" - name: cand_election_yr data_type: varchar - description: "" + description: "Candidate's election year from a Statement of Candidacy or state ballot list" - name: cand_office_st data_type: varchar - description: "" + description: "{{ doc('cand_office_st') }}" - name: cand_office data_type: varchar - description: "" + description: | + H = House + P = President + S = Senate - name: cand_office_district data_type: varchar - description: "" + description: "{{ doc('office_district') }}" - name: cand_ici data_type: varchar - description: "" + description: "{{ doc('cand_ici') }}" - name: cand_status data_type: varchar - description: | - One-letter code explaining if the candidate is: - - C present candidate - - F future candidate - - N not yet a candidate - - P prior candidate + description: "{{ doc('cand_status') }}" - name: cand_pcc data_type: varchar - description: "" + description: "{{ doc('cand_pcc') }}" - name: cand_st1 data_type: varchar - description: "" + description: "Mailing address - street" - name: cand_st2 data_type: varchar - description: "" + description: "Mailing address - street2" - name: cand_city data_type: varchar - description: "" + description: "Mailing address - city" - name: cand_st data_type: varchar - description: "" + description: "Mailing address - state" - name: cand_zip data_type: varchar - description: "" + description: "Mailing address - ZIP code" - name: stg_committee_master description: "The committee master file contains one record for each committee registered with the Federal Election Commission. This includes federal political action committees and party committees, campaign committees for presidential, house and senate candidates, as well as groups or organizations who are spending money for or against candidates for federal office." columns: - name: cmte_id data_type: varchar - description: "A unique identifier assigned to each committee or filer registered with the FEC. In general a committee id begins with the letter C which is followed by eight digits." + description: "{{ doc('cmte_id') }}" - name: cmte_nm data_type: varchar @@ -275,7 +269,7 @@ models: - name: cmte_pty_affiliation data_type: varchar - description: "" + description: "{{ doc('pty_affiliation') }}" - name: cmte_filing_freq data_type: varchar @@ -291,14 +285,14 @@ models: - name: cand_id data_type: varchar - description: "" + description: "{{ doc('cand_id') }}" - name: stg_contributions_from_committees_to_candidates description: "Contains records of contributions from Committees to Candidates at sub ID level." columns: - name: cmte_id data_type: varchar - description: "" + description: "{{ doc('cmte_id') }}" - name: amndt_ind data_type: varchar @@ -362,7 +356,7 @@ models: - name: cand_id data_type: varchar - description: "" + description: "{{ doc('cand_id') }}" - name: tran_id data_type: varchar @@ -389,7 +383,7 @@ models: columns: - name: cand_id data_type: varchar - description: "" + description: "{{ doc('cmte_id') }}" - name: cand_name data_type: varchar @@ -405,7 +399,7 @@ models: - name: cand_pty_affiliation data_type: varchar - description: "" + description: "{{ doc('pty_affiliation') }}" - name: ttl_receipts data_type: decimal(14,2) @@ -465,7 +459,7 @@ models: - name: cand_office_district data_type: varchar - description: "" + description: "{{ doc('office_district') }}" - name: spec_election data_type: varchar @@ -512,7 +506,7 @@ models: columns: - name: cmte_id data_type: varchar - description: "" + description: "{{ doc('cmte_id') }}" - name: amndt_ind data_type: varchar @@ -615,7 +609,7 @@ models: columns: - name: cmte_id data_type: varchar - description: "" + description: "{{ doc('cmte_id') }}" - name: cmte_nm data_type: varchar @@ -720,3 +714,4 @@ models: - name: cvg_end_dt data_type: varchar description: "" + diff --git a/dbt_project.yml b/dbt_project.yml index 2b51c65..9cf00c8 100644 --- a/dbt_project.yml +++ b/dbt_project.yml @@ -17,6 +17,7 @@ test-paths: ["dbt/tests"] seed-paths: ["dbt/seeds"] macro-paths: ["dbt/macros"] snapshot-paths: ["dbt/snapshots"] +docs-paths: ["dbt/docs"] clean-targets: # directories to be removed by `dbt clean` - "target" From 54fba3a3f91f24df3273db22b3ebd91f7c0a969d Mon Sep 17 00:00:00 2001 From: risa Date: Tue, 19 Nov 2024 22:13:37 -0600 Subject: [PATCH 5/8] fix syntax issue --- dbt/models/staging/_staging_properties.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbt/models/staging/_staging_properties.yml b/dbt/models/staging/_staging_properties.yml index ef3c40c..5b5e32d 100644 --- a/dbt/models/staging/_staging_properties.yml +++ b/dbt/models/staging/_staging_properties.yml @@ -80,11 +80,11 @@ models: - name: cand_office_st data_type: varchar - description: ""{{ doc('cand_office_st') }}"" + description: "{{ doc('cand_office_st') }}" - name: cand_office_district data_type: varchar - description: ""{{ doc('office_district') }}"" + description: "{{ doc('office_district') }}" - name: spec_election data_type: varchar From af25c78ddc045702c19920579168b0c451b19d39 Mon Sep 17 00:00:00 2001 From: risa Date: Thu, 5 Dec 2024 15:52:21 -0600 Subject: [PATCH 6/8] fix spacing --- dbt/models/staging/_staging_properties.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dbt/models/staging/_staging_properties.yml b/dbt/models/staging/_staging_properties.yml index 5b5e32d..c0935e2 100644 --- a/dbt/models/staging/_staging_properties.yml +++ b/dbt/models/staging/_staging_properties.yml @@ -3,8 +3,7 @@ version: 2 models: - name: stg_all_candidates description: | - A candidate is an individual seeking nomination for election to a federal office. People become candidates when they (or agents working on their behalf) raise contributions - or make expenditures that exceed $5,000. The all candidate summary file contains one record including summary financial information for all candidates who raised or spent money during the period no matter when they are up for election. + A candidate is an individual seeking nomination for election to a federal office. People become candidates when they (or agents working on their behalf) raise contributions or make expenditures that exceed $5,000. The all candidate summary file contains one record including summary financial information for all candidates who raised or spent money during the period no matter when they are up for election. columns: - name: cand_id data_type: varchar @@ -158,8 +157,7 @@ models: description: "" - name: stg_candidate_master - description: "The candidate master file contains one record for each candidate who has either registered with the Federal Election Commission or appeared - on a ballot list prepared by a state elections office. Includes address, office they are running for, party affiliation, etc." + description: "The candidate master file contains one record for each candidate who has either registered with the Federal Election Commission or appeared on a ballot list prepared by a state elections office. Includes address, office they are running for, party affiliation, etc." columns: - name: cand_id data_type: varchar From d7b71ae3fbf937c85e452292dd50bdbaa6d29dab Mon Sep 17 00:00:00 2001 From: risa Date: Thu, 5 Dec 2024 20:10:47 -0600 Subject: [PATCH 7/8] add more column defs --- dbt/docs/common_fields.md | 39 +++++++++++++++- dbt/models/staging/_staging_properties.yml | 52 ++++++++++++---------- 2 files changed, 66 insertions(+), 25 deletions(-) diff --git a/dbt/docs/common_fields.md b/dbt/docs/common_fields.md index a13478c..79b0f72 100644 --- a/dbt/docs/common_fields.md +++ b/dbt/docs/common_fields.md @@ -1,3 +1,7 @@ +{% docs cand_election_yr %} + Candidate's election year from a Statement of Candidacy or state ballot list +{% enddocs %} + {% docs cand_id %} A 9-character alpha-numeric code assigned to a candidate by the Federal Election Commission. The candidate ID for a specific candidate remains the same across election cycles as long as the candidate is running for the same office. {% enddocs %} @@ -21,14 +25,40 @@ {% docs cand_status %} C = Statutory candidate F = Statutory candidate for future election - N = Not yet a statutory candidate - P = Statutory candidate in prior cycle + N = Not yet a statutory candidate + P = Statutory candidate in prior cycle +{% enddocs %} + +{% docs cmte_dsgn %} + A = Authorized by a candidate + B = Lobbyist/Registrant PAC + D = Leadership PAC + J = Joint fundraiser + P = Principal campaign committee of a candidate + U = Unauthorized +{% enddocs %} + +{% docs cmte_filing_freq %} + A = Administratively terminated + D = Debt + M = Monthly filer + Q = Quarterly filer + T = Terminated + W = Waived {% enddocs %} {% docs cmte_id %} A 9-character alpha-numeric code assigned to a committee by the Federal Election Commission. Committee IDs are unique and an ID for a specific committee always remains the same. {% enddocs %} +{% docs cmte_tp %} + Committee type. See codes: https://www.fec.gov/campaign-finance-data/committee-type-code-descriptions/ +{% enddocs %} + +{% docs cvg_end_dt %} + Coverage end date. Through date. +{% enddocs %} + {% docs office_district %} Two-digit US House distirict of the office the candidate is running for. Presidential, Senate and House at-large candidates will have District 00. {% enddocs %} @@ -36,3 +66,8 @@ {% docs pty_affiliation %} The political party affiliation reported by the candidate. For more information about political party affiliation codes [see this list of political party codes](https://www.fec.gov/campaign-finance-data/party-code-descriptions/). {% enddocs %} + +{% docs tres_nm %} + The officially registered treasurer for the committee. +{% enddocs %} + diff --git a/dbt/models/staging/_staging_properties.yml b/dbt/models/staging/_staging_properties.yml index c0935e2..9b82e5e 100644 --- a/dbt/models/staging/_staging_properties.yml +++ b/dbt/models/staging/_staging_properties.yml @@ -87,23 +87,23 @@ models: - name: spec_election data_type: varchar - description: "" + description: "Special election status. Election result data included in 1996-2006 files only." - name: prim_election data_type: varchar - description: "" + description: "Primary election status. Election result data included in 1996-2006 files only." - name: run_election data_type: varchar - description: "" + description: "Runoff election status. Election result data included in 1996-2006 files only." - name: gen_election data_type: varchar - description: "" + description: "General election status. Election result data included in 1996-2006 files only." - name: gen_election_precent data_type: decimal(7,4) - description: "" + description: "General election percent. Election result data included in 1996-2006 files only." - name: other_pol_cmte_contrib data_type: decimal(14,2) @@ -115,7 +115,7 @@ models: - name: cvg_end_dt data_type: varchar - description: "" + description: "{{ doc('cvg_end_dt') }}" - name: indiv_refunds data_type: decimal(14,2) @@ -134,11 +134,11 @@ models: - name: cand_election_yr data_type: varchar - description: "" + description: "{{ doc('cand_election_yr') }}" - name: fec_election_yr data_type: varchar - description: "" + description: "Active 2-year period." - name: cmte_id data_type: varchar @@ -146,15 +146,15 @@ models: - name: cmte_tp data_type: varchar - description: "" + description: "{{ doc('cmte_tp') }}" - name: cmte_dsgn data_type: varchar - description: "" + description: "{{ doc('cmte_dsgn') }}" - name: linkage_id # unique key data_type: varchar - description: "" + description: "Unique link ID" - name: stg_candidate_master description: "The candidate master file contains one record for each candidate who has either registered with the Federal Election Commission or appeared on a ballot list prepared by a state elections office. Includes address, office they are running for, party affiliation, etc." @@ -173,7 +173,7 @@ models: - name: cand_election_yr data_type: varchar - description: "Candidate's election year from a Statement of Candidacy or state ballot list" + description: "{{ doc('cand_election_yr') }}" - name: cand_office_st data_type: varchar @@ -235,7 +235,7 @@ models: - name: tres_nm data_type: varchar - description: "" + description: "{{ doc('tres_nm') }}" - name: cmte_st1 data_type: varchar @@ -259,11 +259,11 @@ models: - name: cmte_dsgn data_type: varchar - description: "" + description: "{{ doc('cmte_dsgn') }}" - name: cmte_tp data_type: varchar - description: "" + description: "{{ doc('cmte_tp') }}" - name: cmte_pty_affiliation data_type: varchar @@ -271,11 +271,17 @@ models: - name: cmte_filing_freq data_type: varchar - description: "" + description: "{{ doc('cmte_filing_freq') }}" - name: org_tp data_type: varchar - description: "" + description: | + C = Corporation + L = Labor organization + M = Membership organization + T = Trade association + V = Cooperative + W = Corporation without capital stock - name: connected_org_nm data_type: varchar @@ -389,7 +395,7 @@ models: - name: cand_ici data_type: varchar - description: "" + description: "{{ doc('cand_ici') }}" - name: pty_cd data_type: varchar @@ -489,7 +495,7 @@ models: - name: cvg_end_dt data_type: varchar - description: "" + description: "{{ doc('cvg_end_dt') }}" - name: indiv_refunds data_type: decimal(14,2) @@ -615,15 +621,15 @@ models: - name: cmte_tp data_type: varchar - description: "" + description: "{{ doc('cmte_tp') }}" - name: cmte_dsgn data_type: varchar - description: "" + description: "{{ doc('cmte_dsgn') }}" - name: cmte_filing_freq data_type: varchar - description: "" + description: "{{ doc('cmte_filing_freq') }}" - name: ttl_receipts data_type: decimal(14,2) @@ -711,5 +717,5 @@ models: - name: cvg_end_dt data_type: varchar - description: "" + description: "{{ doc('cvg_end_dt') }}" From 3226a6f150a5abacb502ceff8e32b18593e4dd96 Mon Sep 17 00:00:00 2001 From: risa Date: Thu, 5 Dec 2024 21:10:13 -0600 Subject: [PATCH 8/8] add some more pac columns --- dbt/docs/common_fields.md | 52 +++++++++++++++++++ dbt/models/staging/_staging_properties.yml | 60 +++++++++++----------- 2 files changed, 83 insertions(+), 29 deletions(-) diff --git a/dbt/docs/common_fields.md b/dbt/docs/common_fields.md index 79b0f72..9bd7a66 100644 --- a/dbt/docs/common_fields.md +++ b/dbt/docs/common_fields.md @@ -1,3 +1,7 @@ +{% docs amndt_ind %} + Amendment indicator. Indicates if the report being filed is new (N), an amendment (A) to a previous report, or a termination (T) report. +{% enddocs %} + {% docs cand_election_yr %} Candidate's election year from a Statement of Candidacy or state ballot list {% enddocs %} @@ -59,6 +63,38 @@ Coverage end date. Through date. {% enddocs %} +{% docs entity_tp %} + ONLY VALID FOR ELECTRONIC FILINGS received after April 2002. + CAN = Candidate + CCM = Candidate committee + COM = Committee + IND = Individual (a person) + ORG = Organization (not a committee and not a person) + PAC = Political action committee + PTY = Party organization +{% enddocs %} + +{% docs file_num %} + File number/report id. Unique report id. +{% enddocs %} + +{% docs image_num %} + 11-digit Image Number Format + YYOORRRFFFF + YY - scanning year + OO - office (01 - House, 02 - Senate, 03 - FEC Paper, 90-99 - FEC Electronic) + RRR - reel number + FFFF- frame number + + 18-digit Image Number Format (June 29, 2015) + YYYYMMDDSSPPPPPPPP + YYYY - scanning year + MM - scanning month + DD - scanning day + SS - source (02 - Senate, 03 - FEC Paper, 90-99 - FEC Electronic) + PPPPPPPP - page (reset to zero every year on January 1) +{% enddocs %} + {% docs office_district %} Two-digit US House distirict of the office the candidate is running for. Presidential, Senate and House at-large candidates will have District 00. {% enddocs %} @@ -67,6 +103,22 @@ The political party affiliation reported by the candidate. For more information about political party affiliation codes [see this list of political party codes](https://www.fec.gov/campaign-finance-data/party-code-descriptions/). {% enddocs %} +{% docs rpt_tp %} + Report type. See report type codes: https://www.fec.gov/campaign-finance-data/report-type-code-descriptions/ +{% enddocs %} + +{% docs sub_id %} + FEC record number and unique row ID +{% enddocs %} + +{% docs tran_id %} + ONLY VALID FOR ELECTRONIC FILINGS. A unique identifier associated with each itemization or transaction appearing in an FEC electronic file. A transaction ID is unique for a specific committee for a specific report. In other words, if committee, C1, files a Q3 New with transaction SA123 and then files 3 amendments to the Q3 transaction SA123 will be identified by transaction ID SA123 in all 4 filings. +{% enddocs %} + +{% docs transaction_pgi %} + Primary general indicator. +{% enddocs %} + {% docs tres_nm %} The officially registered treasurer for the committee. {% enddocs %} diff --git a/dbt/models/staging/_staging_properties.yml b/dbt/models/staging/_staging_properties.yml index 9b82e5e..0104d5d 100644 --- a/dbt/models/staging/_staging_properties.yml +++ b/dbt/models/staging/_staging_properties.yml @@ -300,27 +300,28 @@ models: - name: amndt_ind data_type: varchar - description: "" + description: "{{ doc('amndt_ind') }}" - name: rpt_tp data_type: varchar - description: "" + description: "{{ doc('rpt_tp') }}" - name: transaction_pgi data_type: varchar - description: "" + description: "{{ doc('transaction_pgi') }}" - name: image_num data_type: varchar - description: "" + description: "{{ doc('image_num') }}" - name: transaction_tp data_type: varchar - description: "" + description: "Transaction types 24A, 24C, 24E, 24F, 24H, 24K, 24N, 24P, 24R, 24Z are included in the PAS2 file. +For more information about transaction type codes see this list of transaction type codes: https://www.fec.gov/campaign-finance-data/transaction-type-code-descriptions" - name: entity_tp data_type: varchar - description: "" + description: "{{ doc('entity_tp') }}" - name: name data_type: varchar @@ -356,7 +357,7 @@ models: - name: other_id data_type: varchar - description: "" + description: "For contributions from individuals this column is null. For contributions from candidates or other committees this column will contain the recipient's FEC ID." - name: cand_id data_type: varchar @@ -364,23 +365,23 @@ models: - name: tran_id data_type: varchar - description: "" + description: "{{ doc('tran_id') }}" - name: file_num data_type: varchar - description: "" + description: "{{ doc('file_num') }}" - name: memo_cd data_type: varchar - description: "" + description: "Memo code. 'X' indicates that the amount of the transaction is not incorporated into the total figure disclosed on the detailed summary page of the committee’s report. 'X' may also indicate that the amount was received as part of a joint fundraising transfer or other lump sum contribution required to be attributed to individual contributors. Memo items may be used to denote that a transaction was previously reported or in the case of an independent expenditure, that the amount represents activity that has occurred but has not yet been paid by the committee. When using the bulk data file these memo items should be included in your analysis." - name: memo_text data_type: varchar - description: "" + description: "A description of the activity. Memo text is available on itemized amounts on Schedules A and B. These transactions are included in the itemization total." - name: sub_id # unique key data_type: varchar - description: "" + description: "{{ doc('sub_id') }}" - name: stg_house_senate description: "Shows total aggregated contributions to Congressional candidates. One record per Congressional candidate." @@ -514,7 +515,7 @@ models: - name: amndt_ind data_type: varchar - description: "" + description: "{{ doc('amndt_ind') }}" - name: rpt_yr data_type: varchar @@ -522,27 +523,27 @@ models: - name: rpt_tp data_type: varchar - description: "" + description: "{{ doc('rpt_tp') }}" - name: image_num data_type: varchar - description: "" + description: "{{ doc('image_num') }}" - name: line_num data_type: varchar - description: "" + description: "Indicates FEC form line number" - name: form_tp_cd data_type: varchar - description: "" + description: "Indicates FEC Form" - name: sched_tp_cd data_type: varchar - description: "" + description: "Schedule B - Itemized disbursements" - name: name data_type: varchar - description: "" + description: "Contributor/Lender/Transfer Name" - name: city data_type: varchar @@ -566,7 +567,7 @@ models: - name: transaction_pgi data_type: varchar - description: "" + description: "{{ doc('transaction_pgi') }}" - name: purpose data_type: varchar @@ -574,39 +575,39 @@ models: - name: category data_type: varchar - description: "" + description: "Disbursement category code" - name: category_desc data_type: varchar - description: "" + description: "Disbursement Category Code Description. See link for category codes: https://www.fec.gov/campaign-finance-data/disbursement-category-code-descriptions" - name: memo_cd data_type: varchar - description: "" + description: "Memo code. 'X' indicates that the amount is NOT to be included in the itemization total." - name: memo_text data_type: varchar - description: "" + description: "A description of the activity. Memo Text is available on itemized amounts on Schedule B. These transactions are included in the itemization total." - name: entity_tp data_type: varchar - description: "" + description: "{{ doc('entity_tp') }}" - name: sub_id data_type: varchar - description: "" + description: "{{ doc('sub_id') }}" - name: file_num data_type: varchar - description: "" + description: "{{ doc('file_num') }}" - name: tran_id data_type: varchar - description: "" + description: "{{ doc('tran_id') }}" - name: back_ref_tran_id data_type: varchar - description: "" + description: "ONLY VALID FOR ELECTRONIC FILINGS. Used to associate one transaction with another transaction in the same report (using file number, transaction ID and back reference transaction ID). For example, a credit card payment and the subitemization of specific purchases. The back reference transaction ID of the specific purchases will equal the transaction ID of the payment to the credit card company." - name: stg_pac_summary description: "This file gives overall receipts and disbursements for each PAC and party committee registered with the commission, along with a breakdown of overall receipts by source and totals for contributions to other committees, independent expenditures made and other information." @@ -719,3 +720,4 @@ models: data_type: varchar description: "{{ doc('cvg_end_dt') }}" +