Skip to content

Commit 13a2cf3

Browse files
clanmillspiponazo
andauthored
fix_1416_iptc_DateCreated (#1547)
* fix_1416_iptc_DateCreated * Fix unit tests * DateValue:read 2nd iteration on pre-condition * test with ISO_8601 date format * Use std::regex for ISO 8601 basic & extended date formats * Use std::regex for ISO 8601 basic & extended time formats * Add more tests & notes for DateValue & TimeValue * Comment tests using local calendar times * DateValue::write also adds padding to year field Co-authored-by: Luis Díaz Más <[email protected]>
1 parent fd84471 commit 13a2cf3

File tree

5 files changed

+282
-177
lines changed

5 files changed

+282
-177
lines changed

include/exiv2/value.hpp

Lines changed: 1 addition & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -979,7 +979,6 @@ namespace Exiv2 {
979979

980980
//! Simple Date helper structure
981981
struct EXIV2API Date {
982-
Date() = default;
983982
int year{0}; //!< Year
984983
int month{0}; //!< Month
985984
int day{0}; //!< Day
@@ -1031,6 +1030,7 @@ namespace Exiv2 {
10311030
@return Number of characters written.
10321031
*/
10331032
long copy(byte* buf, ByteOrder byteOrder = invalidByteOrder) const override;
1033+
10341034
//! Return date struct containing date information
10351035
virtual const Date& getDate() const;
10361036
long count() const override;
@@ -1150,31 +1150,6 @@ namespace Exiv2 {
11501150
//@}
11511151

11521152
private:
1153-
//! @name Manipulators
1154-
//@{
1155-
/*!
1156-
@brief Set time from \em buf if it conforms to \em format
1157-
(3 input items).
1158-
1159-
This function only sets the hour, minute and second parts of time_.
1160-
1161-
@param buf A 0 terminated C-string containing the time to parse.
1162-
@param format Format string for sscanf().
1163-
@return 0 if successful, else 1.
1164-
*/
1165-
int scanTime3(const char* buf, const char* format);
1166-
/*!
1167-
@brief Set time from \em buf if it conforms to \em format
1168-
(6 input items).
1169-
1170-
This function sets all parts of time_.
1171-
1172-
@param buf A 0 terminated C-string containing the time to parse.
1173-
@param format Format string for sscanf().
1174-
@return 0 if successful, else 1.
1175-
*/
1176-
int scanTime6(const char* buf, const char* format);
1177-
//@}
11781153

11791154
//! @name Accessors
11801155
//@{

src/value.cpp

Lines changed: 74 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,17 @@
2727
#include "unused.h"
2828

2929
// + standard includes
30-
#include <iostream>
31-
#include <iomanip>
32-
#include <sstream>
30+
#include <ctype.h>
31+
3332
#include <cassert>
34-
#include <cstring>
35-
#include <ctime>
3633
#include <cstdarg>
3734
#include <cstdio>
3835
#include <cstdlib>
39-
#include <ctype.h>
36+
#include <cstring>
37+
#include <ctime>
38+
#include <iomanip>
39+
#include <regex>
40+
#include <sstream>
4041

4142
// *****************************************************************************
4243
// class member definitions
@@ -776,13 +777,13 @@ namespace Exiv2 {
776777
}
777778

778779
int LangAltValue::read(const std::string& buf)
779-
{
780+
{
780781
std::string b = buf;
781782
std::string lang = "x-default";
782783
if (buf.length() > 5 && buf.substr(0, 5) == "lang=") {
783784
static const char* ALPHA = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
784785
static const char* ALPHA_NUM = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
785-
786+
786787
const std::string::size_type pos = buf.find_first_of(' ');
787788
if (pos == std::string::npos) {
788789
lang = buf.substr(5);
@@ -796,7 +797,7 @@ namespace Exiv2 {
796797

797798
if (lang.empty() || lang.find('"') != lang.length() - 1)
798799
throw Error(kerInvalidLangAltValue, buf);
799-
800+
800801
lang = lang.substr(0, lang.length()-1);
801802
}
802803

@@ -809,7 +810,7 @@ namespace Exiv2 {
809810
if (lang.at(charPos) != '-' || lang.find_first_not_of(ALPHA_NUM, charPos+1) != std::string::npos)
810811
throw Error(kerInvalidLangAltValue, buf);
811812
}
812-
813+
813814
b.clear();
814815
if (pos != std::string::npos) b = buf.substr(pos+1);
815816
}
@@ -906,51 +907,30 @@ namespace Exiv2 {
906907

907908
int DateValue::read(const byte* buf, long len, ByteOrder /*byteOrder*/)
908909
{
909-
// Hard coded to read Iptc style dates
910-
if (len != 8) {
911-
#ifndef SUPPRESS_WARNINGS
912-
EXV_WARNING << Error(kerUnsupportedDateFormat) << "\n";
913-
#endif
914-
return 1;
915-
}
916-
// Make the buffer a 0 terminated C-string for sscanf
917-
char b[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 };
918-
std::memcpy(b, reinterpret_cast<const char*>(buf), 8);
919-
int scanned = sscanf(b, "%4d%2d%2d",
920-
&date_.year, &date_.month, &date_.day);
921-
if ( scanned != 3
922-
|| date_.year < 0
923-
|| date_.month < 1 || date_.month > 12
924-
|| date_.day < 1 || date_.day > 31) {
925-
#ifndef SUPPRESS_WARNINGS
926-
EXV_WARNING << Error(kerUnsupportedDateFormat) << "\n";
927-
#endif
928-
return 1;
929-
}
930-
return 0;
910+
const std::string str(reinterpret_cast<const char*>(buf), len);
911+
return read(str);
931912
}
932913

933914
int DateValue::read(const std::string& buf)
934915
{
935-
// Hard coded to read Iptc style dates
936-
if (buf.length() < 8) {
937-
#ifndef SUPPRESS_WARNINGS
938-
EXV_WARNING << Error(kerUnsupportedDateFormat) << "\n";
939-
#endif
940-
return 1;
916+
// ISO 8601 date formats:
917+
// https://web.archive.org/web/20171020084445/https://www.loc.gov/standards/datetime/ISO_DIS%208601-1.pdf
918+
static const std::regex reExtended(R"(^(\d{4})-(0[1-9]|1[0-2])-(0[1-9]|[12][0-9]|3[01]))");
919+
static const std::regex reBasic(R"(^(\d{4})(0[1-9]|1[0-2])(0[1-9]|[12][0-9]|3[01]))");
920+
std::smatch sm;
921+
922+
// Note: We use here regex_search instead of regex_match, because the string can be longer than expected and
923+
// also contain the time
924+
if (std::regex_search(buf, sm, reExtended) || std::regex_search(buf, sm, reBasic)) {
925+
date_.year = std::stoi(sm[1].str());
926+
date_.month = std::stoi(sm[2].str());
927+
date_.day = std::stoi(sm[3].str());
928+
return 0;
941929
}
942-
int scanned = sscanf(buf.c_str(), "%4d-%2d-%2d",
943-
&date_.year, &date_.month, &date_.day);
944-
if ( scanned != 3
945-
|| date_.year < 0
946-
|| date_.month < 1 || date_.month > 12
947-
|| date_.day < 1 || date_.day > 31) {
948930
#ifndef SUPPRESS_WARNINGS
949931
EXV_WARNING << Error(kerUnsupportedDateFormat) << "\n";
950932
#endif
951-
return 1;
952-
}
953-
return 0;
933+
return 1;
954934
}
955935

956936
void DateValue::setDate(const Date& src)
@@ -962,9 +942,11 @@ namespace Exiv2 {
962942

963943
long DateValue::copy(byte* buf, ByteOrder /*byteOrder*/) const
964944
{
945+
// \note Here the date is copied in the Basic format YYYYMMDD, as the IPTC key Iptc.Application2.DateCreated
946+
// wants it. Check https://exiv2.org/iptc.html
947+
965948
// sprintf wants to add the null terminator, so use oversized buffer
966949
char temp[9];
967-
968950
int wrote = snprintf(temp, sizeof(temp), "%04d%02d%02d", date_.year, date_.month, date_.day);
969951
assert(wrote == 8);
970952
std::memcpy(buf, temp, wrote);
@@ -993,8 +975,9 @@ namespace Exiv2 {
993975

994976
std::ostream& DateValue::write(std::ostream& os) const
995977
{
978+
// Write DateValue in ISO 8601 Extended format: YYYY-MM-DD
996979
std::ios::fmtflags f( os.flags() );
997-
os << date_.year << '-' << std::right
980+
os << std::setw(4) << std::setfill('0') << date_.year << '-' << std::right
998981
<< std::setw(2) << std::setfill('0') << date_.month << '-'
999982
<< std::setw(2) << std::setfill('0') << date_.day;
1000983
os.flags(f);
@@ -1044,83 +1027,50 @@ namespace Exiv2 {
10441027

10451028
int TimeValue::read(const byte* buf, long len, ByteOrder /*byteOrder*/)
10461029
{
1047-
// Make the buffer a 0 terminated C-string for scanTime[36]
1048-
char b[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
1049-
std::memcpy(b, reinterpret_cast<const char*>(buf), (len < 12 ? len : 11));
1050-
// Hard coded to read HHMMSS or Iptc style times
1051-
int rc = 1;
1052-
if (len == 6) {
1053-
// Try to read (non-standard) HHMMSS format
1054-
rc = scanTime3(b, "%2d%2d%2d");
1055-
}
1056-
if (len == 11) {
1057-
rc = scanTime6(b, "%2d%2d%2d%1c%2d%2d");
1058-
}
1059-
if (rc) {
1060-
rc = 1;
1061-
#ifndef SUPPRESS_WARNINGS
1062-
EXV_WARNING << Error(kerUnsupportedTimeFormat) << "\n";
1063-
#endif
1064-
}
1065-
return rc;
1030+
const std::string str(reinterpret_cast<const char*>(buf), len);
1031+
return read(str);
10661032
}
10671033

10681034
int TimeValue::read(const std::string& buf)
10691035
{
1070-
// Hard coded to read H:M:S or Iptc style times
1071-
int rc = 1;
1072-
if (buf.length() < 9) {
1073-
// Try to read (non-standard) H:M:S format
1074-
rc = scanTime3(buf.c_str(), "%d:%d:%d");
1075-
}
1076-
else {
1077-
rc = scanTime6(buf.c_str(), "%d:%d:%d%1c%d:%d");
1036+
// ISO 8601 time formats:
1037+
// https://web.archive.org/web/20171020084445/https://www.loc.gov/standards/datetime/ISO_DIS%208601-1.pdf
1038+
// Not supported formats:
1039+
// 4.2.2.4 Representations with decimal fraction: 232050,5
1040+
static const std::regex re(R"(^(2[0-3]|[01][0-9]):?([0-5][0-9])?:?([0-5][0-9])?$)");
1041+
static const std::regex reExt(R"(^(2[0-3]|[01][0-9]):?([0-5][0-9]):?([0-5][0-9])(Z|[+-](?:2[0-3]|[01][0-9])(?::?(?:[0-5][0-9]))?)$)");
1042+
1043+
std::smatch sm;
1044+
if (std::regex_match(buf, sm, re) || std::regex_match(buf, sm, reExt)) {
1045+
time_.hour = sm.length(1) ? std::stoi(sm[1].str()) : 0;
1046+
time_.minute = sm.length(2) ? std::stoi(sm[2].str()) : 0;
1047+
time_.second = sm.length(3) ? std::stoi(sm[3].str()) : 0;
1048+
if (sm.size() > 4)
1049+
{
1050+
std::string str = sm[4].str();
1051+
const auto strSize = str.size();
1052+
auto posColon = str.find(':');
1053+
1054+
if (posColon == std::string::npos) {
1055+
// Extended format
1056+
time_.tzHour = std::stoi(str.substr(0,3));
1057+
if (strSize > 3) {
1058+
int minute = std::stoi(str.substr(3));
1059+
time_.tzMinute = time_.tzHour < 0 ? -minute : minute;
1060+
}
1061+
} else {
1062+
// Basic format
1063+
time_.tzHour = std::stoi(str.substr(0, posColon));
1064+
int minute = std::stoi(str.substr(posColon+1));
1065+
time_.tzMinute = time_.tzHour < 0 ? -minute : minute;
1066+
}
1067+
}
1068+
return 0;
10781069
}
1079-
if (rc) {
1080-
rc = 1;
10811070
#ifndef SUPPRESS_WARNINGS
1082-
EXV_WARNING << Error(kerUnsupportedTimeFormat) << "\n";
1071+
EXV_WARNING << Error(kerUnsupportedTimeFormat) << "\n";
10831072
#endif
1084-
}
1085-
return rc;
1086-
}
1087-
1088-
int TimeValue::scanTime3(const char* buf, const char* format)
1089-
{
1090-
int rc = 1;
1091-
Time t;
1092-
int scanned = sscanf(buf, format, &t.hour, &t.minute, &t.second);
1093-
if ( scanned == 3
1094-
&& t.hour >= 0 && t.hour < 24
1095-
&& t.minute >= 0 && t.minute < 60
1096-
&& t.second >= 0 && t.second < 60) {
1097-
time_ = t;
1098-
rc = 0;
1099-
}
1100-
return rc;
1101-
}
1102-
1103-
int TimeValue::scanTime6(const char* buf, const char* format)
1104-
{
1105-
int rc = 1;
1106-
Time t;
1107-
char plusMinus = 0;
1108-
int scanned = sscanf(buf, format, &t.hour, &t.minute, &t.second,
1109-
&plusMinus, &t.tzHour, &t.tzMinute);
1110-
if ( scanned == 6
1111-
&& t.hour >= 0 && t.hour < 24
1112-
&& t.minute >= 0 && t.minute < 60
1113-
&& t.second >= 0 && t.second < 60
1114-
&& t.tzHour >= 0 && t.tzHour < 24
1115-
&& t.tzMinute >= 0 && t.tzMinute < 60) {
1116-
time_ = t;
1117-
if (plusMinus == '-') {
1118-
time_.tzHour *= -1;
1119-
time_.tzMinute *= -1;
1120-
}
1121-
rc = 0;
1122-
}
1123-
return rc;
1073+
return 1;
11241074
}
11251075

11261076
void TimeValue::setTime( const Time& src )
@@ -1130,6 +1080,8 @@ namespace Exiv2 {
11301080

11311081
long TimeValue::copy(byte* buf, ByteOrder /*byteOrder*/) const
11321082
{
1083+
// NOTE: Here the time is copied in the Basic format HHMMSS:HHMM, as the IPTC key Iptc.Application2.TimeCreated
1084+
// wants it. Check https://exiv2.org/iptc.html
11331085
char temp[12];
11341086
char plusMinus = '+';
11351087
if (time_.tzHour < 0 || time_.tzMinute < 0)
@@ -1167,8 +1119,10 @@ namespace Exiv2 {
11671119

11681120
std::ostream& TimeValue::write(std::ostream& os) const
11691121
{
1122+
// Write TimeValue in ISO 8601 Extended format: hh:mm:ss±hh:mm
11701123
char plusMinus = '+';
1171-
if (time_.tzHour < 0 || time_.tzMinute < 0) plusMinus = '-';
1124+
if (time_.tzHour < 0 || time_.tzMinute < 0)
1125+
plusMinus = '-';
11721126

11731127
std::ios::fmtflags f( os.flags() );
11741128
os << std::right

tests/bash_tests/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -642,11 +642,11 @@ def addModTest(filename):
642642
stdin = """
643643
a Iptc.Application2.Headline The headline I am
644644
a Iptc.Application2.Keywords Yet another keyword
645-
m Iptc.Application2.DateCreated 2004-8-3
645+
m Iptc.Application2.DateCreated 2004-08-03
646646
a Iptc.Application2.Urgency 3
647647
m Iptc.Application2.SuppCategory "bla bla ba"
648648
a Iptc.Envelope.ModelVersion 2
649-
a Iptc.Envelope.TimeSent 14:41:0-05:00
649+
a Iptc.Envelope.TimeSent 14:41:00-05:00
650650
a Iptc.Application2.RasterizedCaption 230 42 34 2 90 84 23 146
651651
""".lstrip('\n').encode()
652652
Executer('iptctest {tmp}', vars(), stdin=stdin)

0 commit comments

Comments
 (0)