BudgetJustificationGenerator/generate_budget_justification.py at main · NAU-CS/BudgetJustificationGenerator · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
"""
NIH Budget Justification Generator for Northern Arizona University
Generates LaTeX and Microsoft Word (.docx) files from NAU budget Excel templates
Supports 3, 5, and 10-year project templates

Requirements:
    - openpyxl (Python package)
    - python-docx (Python package, for .docx formatting)
    - xelatex (for PDF compilation)
    - pandoc (optional, for .docx generation)

Usage:
    python3 generate_budget_justification.py MyBudget.xlsx
    python3 generate_budget_justification.py MyBudget.xlsx -o output_directory -v

Output:
    - MyBudget_BudgetJustification.tex (LaTeX source)
    - MyBudget_BudgetJustification.docx (Word document, if pandoc is installed)
"""

import openpyxl
import argparse
import os
import sys
import subprocess
from datetime import datetime
from docx import Document
from docx.shared import Inches, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH

# ============================================================================
# UNIVERSITY-MANDATED VERBIAGE CONSTANTS
# ============================================================================

FRINGE_BENEFITS_TEXT = """Fringe benefit rates are rounded estimates based on the projected cost of health, dental, life, disability, FICA and Medicare, unemployment, and retirement benefits relative to the employee's salary and/or wages, FTE, and election of benefits. The employee's fringe benefit rate is calculated by dividing their salary by the total cost of their benefits package."""

def get_indirect_costs_text(rate, mtdc_base, total_indirect):
    """Generate exact NAU MTDC indirect costs verbiage"""
    # Format currency with LaTeX escaping
    mtdc_formatted = format_currency(mtdc_base)
    indirect_formatted = format_currency(total_indirect)
    return f"""Indirect costs are calculated at {rate}\\% of the Modified Total Direct Cost (MTDC) base, per Northern Arizona University's federally negotiated rate agreement for on-campus research. The MTDC base of {mtdc_formatted} excludes equipment, participant support costs, tuition remission, and subaward amounts over \\$25,000. The total indirect costs for the project are {indirect_formatted}."""

# ============================================================================
# UTILITY FUNCTIONS
# ============================================================================

def format_currency(value):
    """Format number as currency with commas ($XX,XXX.XX or $XX,XXX)"""
    if value is None or value == '' or value == 0:
        return '\\$0'
    try:
        num = float(str(value).replace(',', ''))
        if num == int(num):
            return f'\\${int(num):,}'
        return f'\\${num:,.2f}'
    except:
        return '\\$0'

def format_year_range(year_details_list):
    """
    Format yearly breakdowns, using ranges for sequential years with same amounts.

    Args:
        year_details_list: List of tuples (year_num, amount)

    Returns:
        Formatted string like "Years 1-3: $1,000; Year 4: $2,000"
    """
    if not year_details_list:
        return ""

    # Group sequential years with same amount
    grouped = []
    i = 0
    while i < len(year_details_list):
        start_year, amount = year_details_list[i]
        end_year = start_year

        # Look ahead for sequential years with same amount
        j = i + 1
        while j < len(year_details_list):
            next_year, next_amount = year_details_list[j]
            if next_year == end_year + 1 and abs(next_amount - amount) < 0.01:  # Same amount
                end_year = next_year
                j += 1
            else:
                break

        # Format the range
        if start_year == end_year:
            grouped.append(f"Year {start_year}: {format_currency(amount)}")
        else:
            grouped.append(f"Years {start_year}-{end_year}: {format_currency(amount)}")

        i = j

    return '; '.join(grouped)

def format_docx_file(docx_path):
    """
    Format a .docx file to match PDF formatting:
    - 0.5 inch margins on all sides
    - Arial 10pt font
    - Justified text alignment
    - Red highlighting for TODO items

    Args:
        docx_path: Path to the .docx file to format
    """
    try:
        from docx.enum.text import WD_COLOR_INDEX
        from docx.shared import RGBColor

        doc = Document(docx_path)

        # Set margins to 0.5 inches on all sides
        sections = doc.sections
        for section in sections:
            section.top_margin = Inches(0.5)
            section.bottom_margin = Inches(0.5)
            section.left_margin = Inches(0.5)
            section.right_margin = Inches(0.5)

        # Set font to Arial 10pt and justification for all paragraphs
        for paragraph in doc.paragraphs:
            paragraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
            for run in paragraph.runs:
                run.font.name = 'Arial'
                run.font.size = Pt(10)

                # Highlight TODO items in red
                if run.text and 'TODO' in run.text:
                    run.font.color.rgb = RGBColor(255, 0, 0)  # Red text
                    run.font.highlight_color = WD_COLOR_INDEX.YELLOW  # Yellow highlight

        # Format tables
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        paragraph.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
                        for run in paragraph.runs:
                            run.font.name = 'Arial'
                            run.font.size = Pt(10)

                            # Highlight TODO items in red
                            if run.text and 'TODO' in run.text:
                                run.font.color.rgb = RGBColor(255, 0, 0)  # Red text
                                run.font.highlight_color = WD_COLOR_INDEX.YELLOW  # Yellow highlight

        # Save the formatted document
        doc.save(docx_path)
        return True
    except Exception as e:
        return False

def escape_latex(text):
    """Escape LaTeX special characters"""
    if not isinstance(text, str):
        return str(text) if text is not None else ''
    replacements = {
        '#': '\\#',
        '$': '\\$',
        '%': '\\%',
        '&': '\\&',
        '_': '\\_',
        '{': '\\{',
        '}': '\\}',
        '~': '\\textasciitilde{}',
        '^': '\\textasciicircum{}'
    }
    result = text
    for char, replacement in replacements.items():
        result = result.replace(char, replacement)
    return result

def clean_numeric(value):
    """Convert Excel values to clean numbers"""
    if value is None or value == '':
        return 0
    try:
        return float(str(value).replace(',', ''))
    except:
        return 0

def highlight_todo(text):
    """Wrap TODO text in LaTeX highlighting using textcolor for robust compatibility"""
    return f"\\textcolor{{red}}{{{text}}}"

# ============================================================================
# BUDGET DATA EXTRACTOR
# ============================================================================

class BudgetExtractor:
    """Extract budget data from NAU Excel templates"""

    def __init__(self, filepath):
        self.filepath = filepath
        self.wb = openpyxl.load_workbook(filepath, data_only=True)
        self.years = self.detect_years()
        self.senior_personnel = []
        self.other_personnel = []
        self.domestic_travel = []
        self.international_travel = []
        self.cumulative_data = {}
        self.summary_sheet = self.wb['Summary_of_Personnel Costs ']  # Note the trailing space

    def detect_years(self):
        """Auto-detect 3, 5, or 10 year template by scanning year columns"""
        sheet = self.wb['Budget Details']
        # Scan Row 2, Columns 7-25 for year headers
        year_count = 0
        for col in range(7, 26):
            cell_value = sheet.cell(2, col).value
            if cell_value is not None:
                cell_str = str(cell_value).strip()
                # Check if it's a year label (Year 0, Year 1, etc.)
                if cell_str.startswith('Year') or cell_str.isdigit():
                    year_count += 1
        return year_count if year_count > 0 else 5  # Default to 5 if detection fails

    def extract_senior_personnel(self):
        """Extract all senior personnel dynamically from Row 11+"""
        sheet = self.wb['Budget Details']
        row = 11  # Fixed starting row for senior personnel

        while row < 50:  # Safety limit
            name = sheet.cell(row, 2).value
            role = sheet.cell(row, 4).value

            # Stop conditions: empty name, "Total" row, or "OTHER PERSONNEL"
            if not name or str(name).strip().lower() == 'none':
                row += 1
                continue
            if 'Total' in str(name) or 'OTHER' in str(name):
                break

            # Check if role is valid (not "None" string)
            if name and role and str(role).strip().lower() != 'none':
                person = {
                    'name': str(name).strip(),
                    'role': str(role).strip(),
                    'base_salary': clean_numeric(sheet.cell(row, 5).value),
                    'contract_type': clean_numeric(sheet.cell(row, 6).value),
                }

                # Extract person-months for each year (columns 7, 8, 9, 10, 11 for 5-year)
                for i in range(self.years):
                    pm_value = clean_numeric(sheet.cell(row, 7 + i).value)
                    person[f'pm_y{i+1}'] = pm_value

                # Extract salary for each year (columns 20, 21, 22, 23, 24 for 5-year)
                for i in range(self.years):
                    salary_value = clean_numeric(sheet.cell(row, 20 + i).value)
                    person[f'salary_y{i+1}'] = salary_value

                # Extract total salary (column 25)
                person['total_salary'] = clean_numeric(sheet.cell(row, 25).value)

                # Extract total compensation from Summary sheet to calculate fringe
                # Summary sheet starts at row 6 for first senior person (row 11 in Budget Details)
                summary_row = row - 11 + 6
                total_compensation = clean_numeric(self.summary_sheet.cell(summary_row, 9).value)  # Column I = total

                # Calculate fringe as total_compensation - total_salary
                if total_compensation and person['total_salary']:
                    person['total_fringe'] = total_compensation - person['total_salary']
                    # Calculate ERE rate
                    if person['total_salary'] > 0:
                        person['ere_rate'] = (person['total_fringe'] / person['total_salary']) * 100
                    else:
                        person['ere_rate'] = 0
                else:
                    person['total_fringe'] = 0
                    person['ere_rate'] = 0

                self.senior_personnel.append(person)

            row += 1

        return self.senior_personnel

    def extract_other_personnel(self):
        """Extract other personnel from Row 31-32+ area"""
        sheet = self.wb['Budget Details']

        # Find "OTHER PERSONNEL" header (scan rows 30-35)
        start_row = None
        for row in range(30, 36):
            num_cell = sheet.cell(row, 2).value
            if num_cell and 'OTHER' in str(num_cell):
                start_row = row + 2  # Data starts 2 rows after header (skip header row with #, Role, Hours)
                break

        if not start_row:
            return []

        row = start_row
        while row < start_row + 30:  # Max 30 other personnel rows
            number = sheet.cell(row, 2).value
            role = sheet.cell(row, 3).value

            # Stop if role is empty or "None"
            if not role or str(role).strip().lower() == 'none':
                row += 1
                continue

            # Must have a valid role description
            if role:
                # Check if number is valid (not "None")
                num_val = str(number).strip() if number else ''
                if num_val.lower() == 'none':
                    num_val = ''

                # Check if this is a GRA (monthly salary vs hourly)
                hours_cell = sheet.cell(row, 4).value
                is_gra = 'gra' in str(role).lower()

                # For GRAs, hours_week column may contain semester info (e.g., "Spring")
                # Rate is monthly salary, not hourly
                if is_gra and isinstance(hours_cell, str):
                    # GRA with semester designation - default to 20 hours/week
                    hours_week = 20
                    is_monthly = True
                    semester = hours_cell
                else:
                    hours_week = clean_numeric(hours_cell)
                    is_monthly = False
                    semester = None

                position = {
                    'number': num_val,
                    'role': str(role).strip(),
                    'hours_week': hours_week,
                    'rate': clean_numeric(sheet.cell(row, 5).value),
                    'is_monthly': is_monthly,
                    'semester': semester,
                }

                # For Other Personnel, ERE is not stored in columns 6-8 (those are months worked)
                # We'll calculate fringe from Summary sheet or Cumulative data later
                position['ere_rate'] = 0  # Placeholder, will be calculated from actual fringe

                # Extract salary for each year and total
                for i in range(self.years):
                    salary_value = clean_numeric(sheet.cell(row, 20 + i).value)
                    position[f'salary_y{i+1}'] = salary_value

                # Extract total salary (column 25)
                position['total_salary'] = clean_numeric(sheet.cell(row, 25).value)

                # Find matching row in Summary sheet to get fringe
                # Summary sheet has Other Personnel starting around row 31
                total_compensation = None
                for summary_row in range(30, 50):
                    summary_role = self.summary_sheet.cell(summary_row, 2).value
                    if summary_role and str(summary_role).strip() == str(role).strip():
                        total_compensation = clean_numeric(self.summary_sheet.cell(summary_row, 9).value)
                        break

                # Calculate fringe as total_compensation - total_salary
                if total_compensation and position['total_salary']:
                    position['total_fringe'] = total_compensation - position['total_salary']
                    # Calculate ERE rate
                    if position['total_salary'] > 0:
                        position['ere_rate'] = (position['total_fringe'] / position['total_salary']) * 100
                    else:
                        position['ere_rate'] = 0
                else:
                    # Fallback to blended rate if not found in Summary
                    cum_sheet = self.wb['Cumulative']
                    total_oth_salary = cum_sheet.cell(11, 36).value or 0
                    total_oth_benefits = cum_sheet.cell(14, 36).value or 0

                    if total_oth_salary > 0:
                        blended_ere = (total_oth_benefits / total_oth_salary) * 100
                        position['ere_rate'] = blended_ere
                        position['total_fringe'] = position['total_salary'] * (blended_ere / 100)
                    else:
                        position['ere_rate'] = 0
                        position['total_fringe'] = 0

                self.other_personnel.append(position)

            row += 1

        return self.other_personnel

    def extract_travel(self):
        """Extract travel data from Travel Calculator sheet, categorize using Budget Details"""
        # First, get travel categories from Budget Details sheet
        domestic_amounts = set()
        international_amounts = set()

        try:
            details_sheet = self.wb['Budget Details']
            current_category = None

            # Scan travel section in Budget Details (rows 158-170)
            for row in range(158, 175):
                cell_b = details_sheet.cell(row, 2).value
                cell_d = details_sheet.cell(row, 4).value
                year1_amt = clean_numeric(details_sheet.cell(row, 20).value)

                if cell_b:
                    cell_b_str = str(cell_b).strip().lower()
                    if 'domestic' in cell_b_str:
                        current_category = 'domestic'
                    elif 'international' in cell_b_str or 'foreign' in cell_b_str:
                        current_category = 'international'
                    elif 'total' in cell_b_str:
                        current_category = None

                # If we have a Year 1 amount and a current category, record it
                if current_category and year1_amt > 0:
                    if current_category == 'domestic':
                        domestic_amounts.add(year1_amt)
                    else:
                        international_amounts.add(year1_amt)
        except Exception:
            pass  # Fall back to heuristic if Budget Details parsing fails

        # Now extract trips from Travel Calculator
        try:
            sheet = self.wb[' Travel Calculator']  # Note: space before 'Travel'
        except KeyError:
            try:
                sheet = self.wb['Travel Calculator']  # Try without space
            except KeyError:
                return []

        # Rows 4-13 contain trip data in the template
        for row_idx in range(4, 14):
            destination = sheet.cell(row_idx, 2).value
            travelers = clean_numeric(sheet.cell(row_idx, 3).value)

            if destination and travelers and travelers > 0:
                trip = {
                    'destination': str(destination).strip(),
                    'travelers': int(travelers),
                    'days': clean_numeric(sheet.cell(row_idx, 4).value),
                    'nights': clean_numeric(sheet.cell(row_idx, 5).value),
                    'flight_pp': clean_numeric(sheet.cell(row_idx, 6).value),
                    'full_meal_pp': clean_numeric(sheet.cell(row_idx, 7).value),
                    'first_last_meal_pp': clean_numeric(sheet.cell(row_idx, 8).value),
                    'lodging_pn': clean_numeric(sheet.cell(row_idx, 9).value),
                    'transportation_pp': clean_numeric(sheet.cell(row_idx, 10).value),
                    'conf_reg_pp': clean_numeric(sheet.cell(row_idx, 11).value),
                    'misc_pp': clean_numeric(sheet.cell(row_idx, 12).value),  # Column 12 is miscellaneous per person
                    'total_first_last_meal': clean_numeric(sheet.cell(row_idx, 13).value),
                    'total_remaining_meal': clean_numeric(sheet.cell(row_idx, 14).value),
                    'total_flight': clean_numeric(sheet.cell(row_idx, 15).value),
                    'total_lodging': clean_numeric(sheet.cell(row_idx, 16).value),
                    'total_transportation': clean_numeric(sheet.cell(row_idx, 17).value),  # Column 17 is ground transportation total
                    'total_conf_reg': clean_numeric(sheet.cell(row_idx, 18).value),
                    'total_misc': clean_numeric(sheet.cell(row_idx, 19).value),
                    'cumulative': clean_numeric(sheet.cell(row_idx, 20).value),
                }

                # Categorize using Budget Details data
                # Check Year 1 amounts to match with Budget Details categories
                trip_year1 = clean_numeric(sheet.cell(row_idx, 20).value)  # Use cumulative as proxy

                # Match to international if amount appears in international set from Budget Details
                is_international = False
                if international_amounts:
                    is_international = trip_year1 in international_amounts

                # If no match found in international, check if explicitly in domestic
                if not is_international and domestic_amounts:
                    is_international = trip_year1 not in domestic_amounts and trip_year1 not in international_amounts
                    # If not found in either, fall back to heuristic
                    if trip_year1 not in domestic_amounts and trip_year1 not in international_amounts:
                        dest_lower = trip['destination'].lower()
                        is_international = 'international' in dest_lower or trip['flight_pp'] > 1000

                if is_international:
                    self.international_travel.append(trip)
                else:
                    self.domestic_travel.append(trip)

        return self.domestic_travel + self.international_travel

    def extract_cumulative(self):
        """Extract totals from Cumulative sheet (fixed row positions)"""
        sheet = self.wb['Cumulative']

        budget_items = {
            9: 'total_salaries',
            10: 'sr_personnel_salary',
            11: 'other_personnel_salary',
            12: 'total_benefits',
            13: 'sr_personnel_benefits',
            14: 'other_personnel_benefits',
            15: 'tuition_remission',
            16: 'total_equipment',
            17: 'total_travel',
            18: 'domestic_travel',
            19: 'international_travel',
            20: 'participant_support',
            21: 'other_direct_costs',
            22: 'materials_supplies',
            23: 'consultants',
            24: 'other_expenses',
            25: 'total_subawards',
            27: 'total_direct_costs',
            28: 'indirect_base',
            29: 'total_indirect_costs'
        }

        for row_idx, category in budget_items.items():
            years = {}
            # Total is in column 36
            total = clean_numeric(sheet.cell(row_idx, 36).value)

            # Years at columns 11, 16, 21, 26, 31, ... (+5 pattern)
            for i in range(self.years):
                year_col = 11 + (i * 5)
                years[f'year{i+1}'] = clean_numeric(sheet.cell(row_idx, year_col).value)

            self.cumulative_data[category] = {
                'total': total,
                **years
            }

        return self.cumulative_data

    def extract_other_direct_costs_items(self):
        """Extract individual Other Direct Costs line items from Budget Details rows 177-192"""
        self.odc_items = []  # Simple list of all ODC line items

        try:
            sheet = self.wb['Budget Details']
            current_category = None

            # Extract from specific rows 177-192 as requested
            for row in range(177, 193):
                cell_b = sheet.cell(row, 2).value
                cell_d = sheet.cell(row, 4).value

                # Check for category header in column B
                if cell_b:
                    cell_b_str = str(cell_b).strip()
                    if cell_b_str and cell_b_str.lower() not in ['none', '']:
                        current_category = cell_b_str

                # Get description from column D
                description = None
                if cell_d:
                    desc_str = str(cell_d).strip()
                    if desc_str and desc_str.lower() not in ['none', '', 'description']:
                        description = desc_str

                # Skip if no description
                if not description:
                    continue

                # Extract yearly amounts (columns 20-24)
                item = {
                    'category': current_category if current_category else '',
                    'description': description,
                    'yearly': {},
                    'total': 0
                }

                # Get yearly values
                for i in range(self.years):
                    year_val = clean_numeric(sheet.cell(row, 20 + i).value)
                    if year_val > 0:
                        item['yearly'][f'year{i+1}'] = year_val

                # Calculate total from sum of yearly amounts (column 25 contains category subtotals, not line item totals)
                item['total'] = sum(item['yearly'].values())

                # Only add if there's actual budget
                if item['total'] > 0:
                    self.odc_items.append(item)

        except Exception as e:
            pass  # Silently fail if extraction doesn't work

        return self.odc_items

    def extract_subaward_names(self):
        """Extract subaward organization names from Cumulative sheet"""
        self.subaward_names = []
        try:
            sheet = self.wb['Cumulative']
            # Look for subaward organization names in row 25 area or scan for text
            # Check rows 25-30 for subaward organization names in column 2
            for row in range(25, 35):
                cell_val = sheet.cell(row, 2).value
                if cell_val and isinstance(cell_val, str):
                    cell_str = cell_val.strip()
                    # Skip generic labels
                    if cell_str and 'subaward' not in cell_str.lower() and 'total' not in cell_str.lower():
                        # Check if this row has budget amounts (indicates it's an org name)
                        has_amount = False
                        for col in range(11, 40):
                            amt = clean_numeric(sheet.cell(row, col).value)
                            if amt > 0:
                                has_amount = True
                                break
                        if has_amount and len(cell_str) > 2:
                            self.subaward_names.append(cell_str)
        except Exception:
            pass
        return self.subaward_names

# ============================================================================
# LATEX GENERATOR
# ============================================================================

class LaTeXGenerator:
    """Generate LaTeX budget justification from extracted data"""

    def __init__(self, extractor):
        self.data = extractor

    def generate_header(self):
        """Generate document header"""
        return "\\chapter*{BUDGET JUSTIFICATION – NORTHERN ARIZONA UNIVERSITY}"

    def generate_personnel_section(self):
        """Section A: Senior/Key Personnel - fully dynamic"""
        total = self.data.cumulative_data.get('sr_personnel_salary', {}).get('total', 0)

        if not self.data.senior_personnel:
            section = "\\subsection*{A. Senior Personnel—N/A}\n"
            section += "No senior personnel (PI, Co-PI, or Senior Personnel) salary is requested for this project. "
            section += "Key personnel contributing to this project will do so through cost-sharing or are supported by other funding sources.\n\n"
            return section

        section = f"\\subsection*{{A. Senior Personnel—{format_currency(total)}}}\n"

        for i, person in enumerate(self.data.senior_personnel, 1):
            name = escape_latex(person['name'])
            role = escape_latex(person['role'])
            pm_y1 = person.get('pm_y1', 0)
            person_total = person.get('total_salary', 0)
            base_salary = person.get('base_salary', 0)

            # Check if person months vary across years
            pm_values = [person.get(f'pm_y{j+1}', 0) for j in range(self.data.years)]
            pm_all_same = len(set(pm_values)) <= 1  # All values are the same

            # Build the person months description for the header
            if pm_all_same:
                pm_description = f"{pm_y1} Months per year"
            else:
                # Show variation with ranges: "Person months: 12 in Year 1, 3 in Years 2-5"
                pm_year_data = [(j+1, person.get(f'pm_y{j+1}', 0)) for j in range(self.data.years) if person.get(f'pm_y{j+1}', 0) > 0]
                # Format as "X months in Year/Years Y"
                grouped = []
                i = 0
                while i < len(pm_year_data):
                    start_year, months = pm_year_data[i]
                    end_year = start_year

                    # Look ahead for sequential years with same months
                    j = i + 1
                    while j < len(pm_year_data):
                        next_year, next_months = pm_year_data[j]
                        if next_year == end_year + 1 and abs(next_months - months) < 0.01:
                            end_year = next_year
                            j += 1
                        else:
                            break

                    # Format the range
                    if start_year == end_year:
                        grouped.append(f"{months} in Year {start_year}")
                    else:
                        grouped.append(f"{months} in Years {start_year}-{end_year}")

                    i = j

                pm_description = f"Person months: {', '.join(grouped)}"

            section += f"\\subsubsection*{{A{i}. {name}, {role}: {pm_description}, {format_currency(person_total)} total}}\n"

            # Generate role-specific narrative (starts on new line after header)
            last_name = name.split()[-1]

            if 'PI' in role and 'Co' not in role:
                # Principal Investigator - detailed narrative
                section += f"{last_name} is the Principal Investigator who will provide overall leadership, direction, and coordination for all aspects of this research project. "
                section += f"{last_name} will be responsible for scientific and administrative oversight, ensuring that project milestones are met, coordinating with collaborators, managing the research team, and ensuring compliance with all institutional and funding agency requirements. "

                # Describe effort commitment based on whether it varies
                if pm_all_same:
                    section += f"{last_name} will dedicate {pm_y1} person months per year to this project, with a base salary of {format_currency(base_salary)}. "
                else:
                    # Show effort by year
                    effort_parts = [f"{person.get(f'pm_y{j+1}', 0)} person months in Year {j+1}" for j in range(self.data.years) if person.get(f'pm_y{j+1}', 0) > 0]
                    section += f"{last_name} will dedicate {', '.join(effort_parts[:-1])}, and {effort_parts[-1]} to this project, with a base salary of {format_currency(base_salary)}. " if len(effort_parts) > 1 else f"{last_name} will dedicate {effort_parts[0]} to this project, with a base salary of {format_currency(base_salary)}. "


                # Add year-by-year breakdown if available
                year_data = []
                for i in range(self.data.years):
                    salary = person.get(f'salary_y{i+1}', 0)
                    if salary > 0:
                        year_data.append((i+1, salary))
                if year_data:
                    section += f"The total salary requested for {last_name} over {self.data.years} years is {format_currency(person_total)}, allocated as follows: {format_year_range(year_data)}. "
                    section += "A 3\\% annual salary increase is included. "

                section += f"{last_name}'s expertise and leadership are essential to the success of this project.\n\n"

            elif 'Co' in role and 'PI' in role:
                # Co-PI - detailed narrative
                section += f"{last_name} serves as Co-Principal Investigator and will play a critical role in the scientific direction and execution of this research. "

                # Describe effort commitment based on whether it varies
                if pm_all_same:
                    section += f"{last_name} will contribute {pm_y1} person months per year to the project, bringing essential expertise and working in close collaboration with the PI to ensure project success. "
                else:
                    # Show effort by year
                    effort_parts = [f"{person.get(f'pm_y{j+1}', 0)} person months in Year {j+1}" for j in range(self.data.years) if person.get(f'pm_y{j+1}', 0) > 0]
                    section += f"{last_name} will contribute {', '.join(effort_parts[:-1])}, and {effort_parts[-1]} to the project, bringing essential expertise and working in close collaboration with the PI to ensure project success. " if len(effort_parts) > 1 else f"{last_name} will contribute {effort_parts[0]} to the project, bringing essential expertise and working in close collaboration with the PI to ensure project success. "

                section += f"With a base salary of {format_currency(base_salary)}, {last_name} will share responsibility for key project decisions, mentor junior team members, contribute to data analysis and interpretation, and assist in the preparation of publications and presentations. "

                # Add year-by-year breakdown
                year_data = []
                for i in range(self.data.years):
                    salary = person.get(f'salary_y{i+1}', 0)
                    if salary > 0:
                        year_data.append((i+1, salary))
                if year_data:
                    section += f"The total salary requested for {last_name} is {format_currency(person_total)}, distributed as: {format_year_range(year_data)}. "
                    section += "A 3\\% annual salary increase is included. "

                section += highlight_todo(f"[TODO: Describe {last_name}'s specific technical expertise, prior relevant experience, and unique contributions to this project.]") + "\n\n"

            else:
                # Senior Personnel - detailed narrative
                section += f"{last_name} will contribute critical expertise to this research project as Senior Personnel. "

                # Describe effort commitment based on whether it varies
                if pm_all_same:
                    section += f"{last_name} is requesting {pm_y1} months of support per year, with a base salary of {format_currency(base_salary)}. "
                else:
                    # Show effort by year
                    effort_parts = [f"{person.get(f'pm_y{j+1}', 0)} months in Year {j+1}" for j in range(self.data.years) if person.get(f'pm_y{j+1}', 0) > 0]
                    section += f"{last_name} is requesting {', '.join(effort_parts[:-1])}, and {effort_parts[-1]} of support, with a base salary of {format_currency(base_salary)}. " if len(effort_parts) > 1 else f"{last_name} is requesting {effort_parts[0]} of support, with a base salary of {format_currency(base_salary)}. "

                section += f"In this role, {last_name} will provide specialized knowledge and technical guidance, assist with specific research tasks, participate in project meetings and strategic planning, and contribute to the dissemination of research findings. "

                # Add year-by-year breakdown
                year_data = []
                for i in range(self.data.years):
                    salary = person.get(f'salary_y{i+1}', 0)
                    if salary > 0:
                        year_data.append((i+1, salary))
                if year_data:
                    section += f"The total salary requested for {last_name} is {format_currency(person_total)}, with yearly allocation: {format_year_range(year_data)}. "
                    section += "A 3\\% annual salary increase is included. "

                section += highlight_todo(f"[TODO: Describe {last_name}'s specific role, specialized qualifications, and how their expertise complements the research team.]") + "\n\n"

        return section

    def _get_years_intro(self, years_with_salary, hours, rate, position_type="position"):
        """Helper to generate consistent year-aware introduction for Other Personnel"""
        all_years_active = len(years_with_salary) == self.data.years

        if all_years_active:
            return f"will be hired to work {hours} hours per week at a rate of {format_currency(rate)} per hour"

        if len(years_with_salary) == 1:
            return f"will be hired in Year {years_with_salary[0]} to work {hours} hours per week at a rate of {format_currency(rate)} per hour"
        elif len(years_with_salary) > 2 and years_with_salary == list(range(years_with_salary[0], years_with_salary[-1] + 1)):
            return f"will be hired in Years {years_with_salary[0]}-{years_with_salary[-1]} to work {hours} hours per week at a rate of {format_currency(rate)} per hour"
        else:
            years_str = ', '.join(str(y) for y in years_with_salary)
            return f"will be hired in Years {years_str} to work {hours} hours per week at a rate of {format_currency(rate)} per hour"

    def generate_other_personnel_section(self):
        """Section B: Other Personnel"""
        total_salary = self.data.cumulative_data.get('other_personnel_salary', {}).get('total', 0)
        total_benefits = self.data.cumulative_data.get('other_personnel_benefits', {}).get('total', 0)
        tuition = self.data.cumulative_data.get('tuition_remission', {}).get('total', 0)

        # Calculate grand total
        grand_total = total_salary + total_benefits + tuition

        # Filter to only include personnel with budget amounts > 0
        budgeted_personnel = [p for p in self.data.other_personnel if p.get('total_salary', 0) > 0]

        if not budgeted_personnel:
            section = "\\subsection*{B. Other Personnel—N/A}\n"
            section += "No other personnel (graduate students, postdocs, research staff, etc.) are budgeted for this project.\n\n"
            return section

        section = f"\\subsection*{{B. Other Personnel—{format_currency(grand_total)}}}\n"

        for i, position in enumerate(budgeted_personnel, 1):
            role = escape_latex(position['role'])
            hours = position.get('hours_week', 0)
            position_total = position.get('total_salary', 0)
            rate = position.get('rate', 0)

            # Check which years have salary (indicating when position is active)
            years_with_salary = [j+1 for j in range(self.data.years) if position.get(f'salary_y{j+1}', 0) > 0]
            all_years_active = len(years_with_salary) == self.data.years

            # Build the effort description for the header
            if all_years_active:
                effort_description = f"{hours} hours/week"
            else:
                # Show which years: "20 hours/week (Years 1-2)" or "20 hours/week (Years 1, 3, 5)"
                if len(years_with_salary) > 2 and years_with_salary == list(range(years_with_salary[0], years_with_salary[-1] + 1)):
                    # Consecutive years
                    effort_description = f"{hours} hours/week (Years {years_with_salary[0]}-{years_with_salary[-1]})"
                else:
                    # Non-consecutive years
                    years_str = ', '.join(str(y) for y in years_with_salary)
                    effort_description = f"{hours} hours/week (Years {years_str})"

            section += f"\\subsubsection*{{B{i}. {role}: {effort_description}, {format_currency(position_total)} total}}\n"

            # Generate description based on role type with detailed narratives
            role_lower = role.lower()

            # Check for undergrad BEFORE checking for grad (to avoid matching "undergrad" as "grad")
            if 'undergrad' in role_lower:
                # Add time frame if not all years
                if all_years_active:
                    section += f"Undergraduate students will be hired to work {hours} hours per week at a rate of {format_currency(rate)} per hour. "
                else:
                    if len(years_with_salary) == 1:
                        section += f"Undergraduate students will be hired in Year {years_with_salary[0]} to work {hours} hours per week at a rate of {format_currency(rate)} per hour. "
                    elif len(years_with_salary) > 2 and years_with_salary == list(range(years_with_salary[0], years_with_salary[-1] + 1)):
                        section += f"Undergraduate students will be hired in Years {years_with_salary[0]}-{years_with_salary[-1]} to work {hours} hours per week at a rate of {format_currency(rate)} per hour. "
                    else:
                        years_str = ', '.join(str(y) for y in years_with_salary)
                        section += f"Undergraduate students will be hired in Years {years_str} to work {hours} hours per week at a rate of {format_currency(rate)} per hour. "

                section += f"Undergraduate researchers will gain hands-on research experience by assisting with data collection and entry, conducting literature searches, preparing research materials, maintaining laboratory notebooks, participating in team meetings, and contributing to specific project tasks under the supervision of senior personnel. "

                year_data = []
                for j in range(self.data.years):
                    salary = position.get(f'salary_y{j+1}', 0)
                    if salary > 0:
                        year_data.append((j+1, salary))
                if year_data:
                    section += f"Total undergraduate support is budgeted at {format_currency(position_total)}, with yearly allocation: {format_year_range(year_data)}. "

                section += r"A 3\% annual salary increase is included. "

                section += "This investment in undergraduate training aligns with the university's educational mission and provides essential research support. "
                section += highlight_todo("[TODO: Specify training objectives and anticipated student contributions]") + "\n\n"

            elif 'postdoc' in role_lower or 'post-doc' in role_lower:
                # Add time frame if not all years
                if all_years_active:
                    section += f"A postdoctoral researcher will be hired to work {hours} hours per week at a rate of {format_currency(rate)} per hour. "
                else:
                    if len(years_with_salary) == 1:
                        section += f"A postdoctoral researcher will be hired in Year {years_with_salary[0]} to work {hours} hours per week at a rate of {format_currency(rate)} per hour. "
                    elif len(years_with_salary) > 2 and years_with_salary == list(range(years_with_salary[0], years_with_salary[-1] + 1)):
                        section += f"A postdoctoral researcher will be hired in Years {years_with_salary[0]}-{years_with_salary[-1]} to work {hours} hours per week at a rate of {format_currency(rate)} per hour. "
                    else:
                        years_str = ', '.join(str(y) for y in years_with_salary)
                        section += f"A postdoctoral researcher will be hired in Years {years_str} to work {hours} hours per week at a rate of {format_currency(rate)} per hour. "

                section += f"The postdoctoral researcher will conduct independent research under the guidance of the PI, assist with experimental design and data collection, train and supervise graduate and undergraduate students, contribute to manuscript preparation and grant writing, and participate in lab meetings and professional development activities. "

                # Add year-by-year breakdown
                year_data = []
                for j in range(self.data.years):
                    salary = position.get(f'salary_y{j+1}', 0)
                    if salary > 0:
                        year_data.append((j+1, salary))
                if year_data:
                    section += f"The total salary requested for this position is {format_currency(position_total)}, distributed as: {format_year_range(year_data)}. "

                section += "This position is essential for maintaining research productivity and providing mentorship to junior team members. "
                section += highlight_todo("[TODO: Specify research focus areas and required qualifications]") + "\n\n"

            elif 'grad' in role_lower or 'gra' in role_lower:
                # Check if this is monthly or hourly pay
                is_monthly = position.get('is_monthly', False)
                rate_text = f"a monthly stipend of {format_currency(rate)}" if is_monthly else f"a rate of {format_currency(rate)} per hour"

                # Add time frame if not all years
                if all_years_active:
                    section += f"A Graduate Research Assistant will be hired to work {hours} hours per week at {rate_text}. "
                else:
                    if len(years_with_salary) == 1:
                        section += f"A Graduate Research Assistant will be hired in Year {years_with_salary[0]} to work {hours} hours per week at {rate_text}. "
                    elif len(years_with_salary) > 2 and years_with_salary == list(range(years_with_salary[0], years_with_salary[-1] + 1)):
                        section += f"A Graduate Research Assistant will be hired in Years {years_with_salary[0]}-{years_with_salary[-1]} to work {hours} hours per week at {rate_text}. "
                    else:
                        years_str = ', '.join(str(y) for y in years_with_salary)
                        section += f"A Graduate Research Assistant will be hired in Years {years_str} to work {hours} hours per week at {rate_text}. "

                section += f"The GRA will assist with literature reviews, data collection and analysis, laboratory experiments, field work as needed, maintenance of research equipment and supplies, preparation of research presentations, and participation in project team meetings. "

                year_data = []
                for j in range(self.data.years):
                    salary = position.get(f'salary_y{j+1}', 0)
                    if salary > 0:
                        year_data.append((j+1, salary))
                if year_data:
                    section += f"Total support for this position is {format_currency(position_total)}, allocated as: {format_year_range(year_data)}. "

                section += r"A 3\% annual salary increase is included. "

                section += "This position will provide valuable research training and contribute significantly to project deliverables. "
                section += highlight_todo("[TODO: Specify required academic background and specific responsibilities]") + "\n\n"

            elif 'project' in role_lower and 'manager' in role_lower:
                intro = self._get_years_intro(years_with_salary, hours, rate)
                section += f"A Project Manager {intro}. "
                section += f"The Project Manager will coordinate project activities across team members and collaborating institutions, manage the project timeline and deliverables, organize team meetings and communications, oversee budget expenditures and financial reporting, ensure compliance with institutional and funding agency requirements, maintain project documentation and databases, and facilitate dissemination of research findings. "

                year_data = []
                for j in range(self.data.years):
                    salary = position.get(f'salary_y{j+1}', 0)
                    if salary > 0:
                        year_data.append((j+1, salary))
                if year_data:
                    section += f"Total compensation for this position is {format_currency(position_total)}, distributed over {self.data.years} years as: {format_year_range(year_data)}. "

                section += r"A 3\% annual salary increase is included. "

                section += "This position is critical for ensuring efficient project execution and successful completion of all milestones. "
                section += highlight_todo("[TODO: Specify required project management experience and qualifications]") + "\n\n"

            elif 'lab' in role_lower and 'manager' in role_lower:
                intro = self._get_years_intro(years_with_salary, hours, rate)
                section += f"A Laboratory Manager {intro}. "
                section += f"The Lab Manager will maintain laboratory equipment and facilities, manage laboratory supplies and inventory, ensure compliance with safety regulations and protocols, train personnel on equipment use and safety procedures, coordinate equipment maintenance and repairs, maintain laboratory records and documentation, and support day-to-day laboratory operations. "

                year_data = []
                for j in range(self.data.years):
                    salary = position.get(f'salary_y{j+1}', 0)
                    if salary > 0:
                        year_data.append((j+1, salary))
                if year_data:
                    section += f"Total funding for this position is {format_currency(position_total)}, allocated as: {format_year_range(year_data)}. "

                section += r"A 3\% annual salary increase is included. "

                section += "This position is essential for maintaining a safe, efficient, and productive laboratory environment. "
                section += highlight_todo("[TODO: Specify required technical skills and laboratory experience]") + "\n\n"

            elif 'engineer' in role_lower or 'research' in role_lower and 'specialist' in role_lower:
                intro = self._get_years_intro(years_with_salary, hours, rate)
                section += f"A Research Engineer/Specialist {intro}. "
                section += f"This technical specialist will design and implement experimental protocols, develop and maintain research instrumentation and equipment, perform complex technical analyses, troubleshoot technical issues, provide technical training to research team members, contribute to method development and optimization, and assist with technical aspects of manuscript preparation. "

                year_data = []
                for j in range(self.data.years):
                    salary = position.get(f'salary_y{j+1}', 0)
                    if salary > 0:
                        year_data.append((j+1, salary))
                if year_data:
                    section += f"Total support is budgeted at {format_currency(position_total)}, with distribution: {format_year_range(year_data)}. "

                section += r"A 3\% annual salary increase is included. "

                section += "This specialized technical expertise is crucial for achieving the project's technical objectives. "
                section += highlight_todo("[TODO: Specify required technical specializations and experience]") + "\n\n"

            elif 'program' in role_lower and 'evaluator' in role_lower:
                intro = self._get_years_intro(years_with_salary, hours, rate)
                section += f"A Program Evaluator {intro}. "
                section += f"The Program Evaluator will design and implement evaluation frameworks and methodologies, collect and analyze program outcome data, assess project effectiveness and impact, prepare evaluation reports and recommendations, conduct stakeholder surveys and interviews, monitor progress toward project goals, and provide feedback to improve program implementation. "

                year_data = []
                for j in range(self.data.years):
                    salary = position.get(f'salary_y{j+1}', 0)
                    if salary > 0:
                        year_data.append((j+1, salary))
                if year_data:
                    section += f"Total evaluation support is {format_currency(position_total)}, allocated as: {format_year_range(year_data)}. "

                section += r"A 3\% annual salary increase is included. "

                section += "Independent evaluation is essential for assessing project impact and informing continuous improvement. "
                section += highlight_todo("[TODO: Specify evaluation methodologies and expected deliverables]") + "\n\n"

            elif 'data' in role_lower and ('analyst' in role_lower or 'scientist' in role_lower):
                intro = self._get_years_intro(years_with_salary, hours, rate)
                section += f"A Data Analyst/Scientist {intro}. "
                section += f"This position will manage and analyze research data, develop and implement data management protocols, perform statistical analyses and modeling, create data visualizations and reports, ensure data quality and integrity, maintain research databases, and contribute to data-related sections of publications and presentations. "

                year_data = []
                for j in range(self.data.years):
                    salary = position.get(f'salary_y{j+1}', 0)
                    if salary > 0:
                        year_data.append((j+1, salary))
                if year_data:
                    section += f"Total compensation is {format_currency(position_total)}, distributed as: {format_year_range(year_data)}. "