1+ import re
12from enum import StrEnum
23
34from pydantic import BaseModel , Field , model_validator
@@ -118,11 +119,36 @@ class CompanyInfo(BaseModel):
118119
119120
120121class EmploymentDuration (BaseModel ):
121- date_format : str
122- start : str
123- end : str
122+ start : str # Format: YYYY.MM
123+ end : str | None = None # Format: YYYY.MM or None for ongoing
124124 duration_months : int
125125
126+ @model_validator (mode = "before" )
127+ @classmethod
128+ def normalize_dates (cls , v : dict ):
129+ """Convert MM.YYYY or MMM YYYY to YYYY.MM format"""
130+ if isinstance (v , dict ):
131+ for field in ["start" , "end" ]:
132+ if field in v and isinstance (v [field ], str ):
133+ val = v [field ].strip ()
134+ if val .lower () in ("present" , "current" , "" ):
135+ v [field ] = None
136+ elif match := re .match (r'(\d{2})\.(\d{4})' , val ): # MM.YYYY
137+ month , year = match .groups ()
138+ v [field ] = f"{ year } .{ month } "
139+ elif match := re .match (r'([A-Za-z]{3})\s+(\d{4})' , val ): # MMM YYYY
140+ month_name , year = match .groups ()
141+ month_map = {
142+ 'jan' : '01' , 'feb' : '02' , 'mar' : '03' , 'apr' : '04' ,
143+ 'may' : '05' , 'jun' : '06' , 'jul' : '07' , 'aug' : '08' ,
144+ 'sep' : '09' , 'oct' : '10' , 'nov' : '11' , 'dec' : '12'
145+ }
146+ month = month_map .get (month_name .lower ()[:3 ], '01' )
147+ v [field ] = f"{ year } .{ month } "
148+ elif match := re .match (r'(\d{4})' , val ): # YYYY only
149+ v [field ] = f"{ val } .01"
150+ return v
151+
126152
127153class KeyPoint (BaseModel ):
128154 text : str
@@ -202,8 +228,8 @@ class EducationItem(BaseModel):
202228 field : str
203229 institution : InstitutionInfo
204230 location : Location | None = None
205- start : str | None = None
206- end : str | None = None
231+ start : str | None = None # Format: YYYY.MM
232+ end : str | None = None # Format: YYYY.MM or None for in-progress
207233 status : EducationStatus
208234 coursework : list [Coursework ] = Field (default_factory = list )
209235 extras : list [EducationExtra ] = Field (default_factory = list )
@@ -213,6 +239,27 @@ class EducationItem(BaseModel):
213239 def accept_legacy_education (cls , v : dict ):
214240 if "institution" in v and isinstance (v ["institution" ], str ):
215241 v ["institution" ] = {"name" : v ["institution" ]}
242+
243+ # Normalize dates to YYYY.MM format
244+ for field in ["start" , "end" ]:
245+ if field in v and isinstance (v [field ], str ):
246+ val = v [field ].strip ()
247+ if val .lower () in ("present" , "current" , "" ):
248+ v [field ] = None
249+ elif match := re .match (r'(\d{2})\.(\d{4})' , val ): # MM.YYYY
250+ month , year = match .groups ()
251+ v [field ] = f"{ year } .{ month } "
252+ elif match := re .match (r'([A-Za-z]{3})\s+(\d{4})' , val ): # MMM YYYY
253+ month_name , year = match .groups ()
254+ month_map = {
255+ 'jan' : '01' , 'feb' : '02' , 'mar' : '03' , 'apr' : '04' ,
256+ 'may' : '05' , 'jun' : '06' , 'jul' : '07' , 'aug' : '08' ,
257+ 'sep' : '09' , 'oct' : '10' , 'nov' : '11' , 'dec' : '12'
258+ }
259+ month = month_map .get (month_name .lower ()[:3 ], '01' )
260+ v [field ] = f"{ year } .{ month } "
261+ elif match := re .match (r'(\d{4})' , val ): # YYYY only
262+ v [field ] = f"{ val } .01"
216263 if "start_date" in v and "start" not in v :
217264 v ["start" ] = v .pop ("start_date" )
218265 if "end_date" in v and "end" not in v :
0 commit comments