-
Notifications
You must be signed in to change notification settings - Fork 3
Open
Labels
Description
When importing the data into the database, I came across different date formats/issues.
This is the function I ended up writing to cleanup stuff. If I get more time, I'll try to record the specific examples for each case (where one doesn't already exist).
function fix_date($date, $dept = NULL) {
if ($date === NULL) return NULL;
// Some departments uses these to mean no date
if (in_array($date, ['-', '--', 'N/A', '-00-00'])) return NULL;
if (strpos($date, '#') !== FALSE) return NULL;
// Apparently Health Canada has a bad date in their system
// (http://www.contracts-contrats.hc-sc.gc.ca//cfob/mssid/contractdisc.nsf/WEBbypurpose/1FF63FED7899CE3585257BB80069C522?OpenDocument&lang=eng&)
if ($date === 'Incorrect data type for operator or @Function: Time/Date expected-ed-ed') return NULL;
// PSPC also has a bad date
// (http://www.tpsgc-pwgsc.gc.ca/cgi-bin/proactive/cl.pl?lang=eng;SCR=D;Sort=0;PF=CL201415Q3.txt;LN=166)
if ($date === '376,775,70') return NULL;
// Handle encoding issues
$date = str_replace('â', '-', $date);
$date = str_replace('.', '-', $date);
// Handle made up days (return closest day, if identifiable)
if ($date === '2006-09-31') return '2006-09-30';
if ($date === '2005-19-15') return NULL; // Not sure if Sept. or Oct., so just NULLify it
// ssc - 902134bb25555d47dcf9ec4bb46308ad.json
if ($date === '2015-03-34') return '2015-03-31';
// ssc - 91d71947f58f0c5da4d4ed004c0bcb6c.json
if ($date === '2015-01-07 to 2016-01-06') return '2015-01-07';
// No idea about these dates
if (in_array($date, [
'42033', // ssc - 902134bb25555d47dcf9ec4bb46308ad.json
'2015-02-1-22', // ssc - 97cb0d6f55630e99d0b8cf8ee2c3bd20.json,
'2015-02-1-22', // ssc - bb920996e4a221780a9141993d2272ad.json
'2018-0-30', // ssc - d04880f46750e3f3303124749c49284c.json
'2015-20-24', // ssc - abbe1852768c33e0bf1c4009acd6a0a3.json
'2015-30-30', // ssc - bb920996e4a221780a9141993d2272ad.json
])) return NULL;
// Some departments have weird formats
if (in_array($dept, ['csa'])) {
$pdate = date_create_from_format('Y-d-m', $date);
if (!$pdate) throw new Exception('Unknown date format "' . $date . '"');
return $pdate->format('Y-m-d');
}
if (in_array($dept, ['pspc'])) {
$pdate = NULL;
$date = str_replace('-', '/', $date);
if (preg_match('/\d{1,2}\/\d{2}\/\d{4}/', $date)) {
$pdate = date_create_from_format('d/m/Y', $date);
} else {
$pdate = date_create_from_format('Y/m/d', $date);
}
if (!$pdate) throw new Exception('Unknown date format "' . $date . '"');
return $pdate->format('Y-m-d');
}
if (preg_match('/\d{1,2}-\d{1,2}-\d{4}/', $date)) {
$pdate = date_create_from_format('d-m-Y', $date);
if (!$pdate) throw new Exception('Unknown date format "' . $date . '"');
return $pdate->format('Y-m-d');
}
// d/m/Y format
if (preg_match('/\d{1,2}\/\d{1,2}\/\d{4}/', $date)) {
$pdate = date_create_from_format('d/m/Y', $date);
if (!$pdate) throw new Exception('Unknown date format "' . $date . '"');
return $pdate->format('Y-m-d');
}
// Handle typos
if (preg_match('/\d{6}-\d{2}/', $date)) {
$pdate = date_create_from_format('Ym-d', $date);
if (!$pdate) throw new Exception('Unknown date format "' . $date . '"');
return $pdate->format('Y-m-d');
}
if (preg_match('/\d{4}-\d{4}/', $date)) {
$pdate = date_create_from_format('Y-md', $date);
if (!$pdate) throw new Exception('Unknown date format "' . $date . '"');
return $pdate->format('Y-m-d');
}
return $date;
}nschonni