Skip to content

Commit 03bc045

Browse files
PaulHaxthewtex
authored andcommitted
WIP feat(image-sets-normalization): convert to UTF8 from character set
1 parent 830fafa commit 03bc045

File tree

3 files changed

+47
-41
lines changed

3 files changed

+47
-41
lines changed

packages/dicom/gdcm/DICOMTagReader.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -361,14 +361,14 @@ class CharStringToUTF8Converter
361361
}
362362

363363
std::string
364-
convertCharStringToUTF8(const std::string &str)
364+
convertCharStringToUTF8(const std::string &str) const
365365
{
366366
size_t len = str.size();
367367
return this->convertCharStringToUTF8(str.c_str(), len);
368368
}
369369

370370
std::string
371-
convertCharStringToUTF8(const char *str, size_t len)
371+
convertCharStringToUTF8(const char *str, size_t len) const
372372
{
373373
// m_charsets must always have at least 1 element prior to calling
374374
const char *initialCharset = definedTermToIconvCharset(m_charsets[0]);

packages/dicom/gdcm/Tags.h

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
#define TAGS_H
2020

2121
#include <string>
22-
#include <unordered_set>
22+
#include <set>
2323
#include "itkGDCMImageIO.h"
2424

2525
using Tag = gdcm::Tag;
@@ -29,8 +29,13 @@ const Tag STUDY_UID(0x0020, 0x000d); // "Study Instance UID"
2929
const Tag SERIES_UID(0x0020, 0x000e); // "Series Instance UID"
3030
const Tag INSTANCE_UID(0x0008, 0x0018); // "Instance UID"
3131

32-
const Tag FRAME_OF_REFERENCE_UID(0x0020, 0x0052);
33-
const Tag IMAGE_ORIENTATION_PATIENT(0x0020, 0x0037);
32+
const Tag FRAME_OF_REFERENCE_UID(0x0020, 0x0052);
33+
const Tag IMAGE_ORIENTATION_PATIENT(0x0020, 0x0037);
34+
35+
const Tag SPECIFIC_CHARACTER_SET(0x0008, 0x0005);
36+
const Tag PIXEL_DATA_TAG(0x7fe0, 0x0010);
37+
38+
const Tags EMPTY_TAGS = {};
3439

3540
// Tag names from https://docs.aws.amazon.com/healthimaging/latest/devguide/reference-dicom-support.html
3641
const Tags PATIENT_TAGS = {
@@ -193,4 +198,17 @@ const Tags SERIES_TAGS = {
193198
Tag(0x0020, 0x1040), // "Position Reference Indicator"
194199
};
195200

201+
std::pair<const char *, size_t> getTagBuffer(const gdcm::DataSet &ds, const gdcm::Tag &tag)
202+
{
203+
if (!ds.FindDataElement(tag) || ds.GetDataElement(tag).IsEmpty())
204+
{
205+
return std::make_pair(nullptr, 0);
206+
}
207+
const gdcm::DataElement de = ds.GetDataElement(tag);
208+
const gdcm::ByteValue *bv = de.GetByteValue();
209+
const char *tagValue = bv->GetPointer();
210+
size_t len = bv->GetLength();
211+
return std::make_pair(tagValue, len);
212+
}
213+
196214
#endif // TAGS_H

packages/dicom/gdcm/image-sets-normalization.cxx

Lines changed: 24 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ namespace gdcm
108108
return false;
109109
}
110110

111-
void dataElementToJSONArray(const VR::VRType vr, const DataElement &de, rapidjson::Value &jsonArray, rapidjson::Document::AllocatorType &allocator)
111+
void dataElementToJSONArray(const VR::VRType vr, const DataElement &de, rapidjson::Value &jsonArray, const CharStringToUTF8Converter toUtf8, rapidjson::Document::AllocatorType &allocator)
112112
{
113113
jsonArray.SetArray();
114114
if (de.IsEmpty())
@@ -169,7 +169,7 @@ namespace gdcm
169169
assert(str2 && (size_t)(str2 - component.c_str()) <= len2);
170170
const char *sep2 = strchr(str2, '=');
171171
const size_t llen2 = (sep2 != NULL) ? (sep2 - str2) : (component.c_str() + len2 - str2);
172-
const std::string group(str2, llen2);
172+
const std::string group = toUtf8.convertCharStringToUTF8(str2, llen2);
173173
// const char *thekey = Keys[idx++];
174174

175175
// rapidjson::Value nameType(thekey, allocator);
@@ -235,9 +235,9 @@ namespace gdcm
235235
assert(str1 && (size_t)(str1 - value) <= len);
236236
const char *sep = strchr(str1, '\\');
237237
const size_t llen = (sep != NULL) ? (sep - str1) : (value + len - str1);
238-
// json_object_array_add(my_array, json_object_new_string_len(str1, llen));
238+
const std::string valueUtf8 = toUtf8.convertCharStringToUTF8(str1, llen);
239239
rapidjson::Value valueString;
240-
valueString.SetString(str1, llen, allocator);
240+
valueString.SetString(valueUtf8.c_str(), valueUtf8.size(), allocator);
241241
jsonArray.PushBack(valueString, allocator);
242242
if (sep == NULL)
243243
break;
@@ -246,14 +246,14 @@ namespace gdcm
246246
}
247247
else // default
248248
{
249+
const std::string valueUtf8 = toUtf8.convertCharStringToUTF8(value, len);
249250
rapidjson::Value valueString;
250-
valueString.SetString(value, len, allocator);
251+
valueString.SetString(valueUtf8.c_str(), valueUtf8.size(), allocator);
251252
jsonArray.PushBack(valueString, allocator);
252253
}
253254
}
254255

255-
const gdcm::Tag PIXEL_DATA_TAG = gdcm::Tag(0x7fe0, 0x0010);
256-
rapidjson::Value *toJson(const gdcm::DataSet &dataSet, const Tags pickTags, const Tags skipTags, rapidjson::Value &dicomTagsObject, rapidjson::Document::AllocatorType &allocator)
256+
rapidjson::Value *toJson(const gdcm::DataSet &dataSet, const Tags &pickTags, const Tags &skipTags, const CharStringToUTF8Converter &toUtf8, rapidjson::Value &dicomTagsObject, rapidjson::Document::AllocatorType &allocator)
257257
{
258258
for (gdcm::DataSet::ConstIterator it = dataSet.Begin(); it != dataSet.End(); ++it)
259259
{
@@ -293,7 +293,7 @@ namespace gdcm
293293
const DataSet &nested = item.GetNestedDataSet();
294294
rapidjson::Value sequenceObject(rapidjson::kObjectType);
295295
// grab all nested tags, empty pick and skip tag sets
296-
toJson(nested, {}, {}, sequenceObject, allocator);
296+
toJson(nested, EMPTY_TAGS, EMPTY_TAGS, toUtf8, sequenceObject, allocator);
297297
tagValue.PushBack(sequenceObject, allocator);
298298
}
299299
}
@@ -312,7 +312,7 @@ namespace gdcm
312312
}
313313
else if (VR::IsASCII(vr))
314314
{
315-
dataElementToJSONArray(vr, de, tagValue, allocator);
315+
dataElementToJSONArray(vr, de, tagValue, toUtf8, allocator);
316316
}
317317
else
318318
{
@@ -466,6 +466,14 @@ namespace gdcm
466466
}
467467
}
468468

469+
rapidjson::Value *toJson(const gdcm::DataSet &dataSet, const Tags &pickTags, const Tags &skipTags, rapidjson::Value &dicomTagsObject, rapidjson::Document::AllocatorType &allocator)
470+
{
471+
const auto specificCharacterSet = getTagBuffer(dataSet, SPECIFIC_CHARACTER_SET);
472+
const std::string charSet = specificCharacterSet.first == nullptr ? "" : std::string(specificCharacterSet.first, specificCharacterSet.second);
473+
const CharStringToUTF8Converter decoder = CharStringToUTF8Converter(charSet);
474+
return toJson(dataSet, pickTags, skipTags, decoder, dicomTagsObject, allocator);
475+
}
476+
469477
using FileName = std::string;
470478

471479
struct DicomFile
@@ -476,13 +484,6 @@ struct DicomFile
476484
DicomFile(const FileName &fileName)
477485
: fileName(fileName)
478486
{
479-
itk::DICOMTagReader tagReader;
480-
if (!tagReader.CanReadFile(fileName))
481-
{
482-
throw std::runtime_error("Can not read the input DICOM file: " + fileName);
483-
}
484-
tagReader.SetFileName(fileName);
485-
486487
gdcm::ImageReader reader;
487488
reader.SetFileName(fileName.c_str());
488489
if (!reader.Read())
@@ -520,22 +521,9 @@ DicomFiles loadFiles(const std::vector<FileName> &fileNames)
520521
}
521522

522523
using Volume = std::vector<DicomFile>;
523-
using Volumes = std::vector<Volume>; // aka ImageSet
524+
using Volumes = std::vector<Volume>; // Aka ImageSet. A set of volumes/series that share Study and Patient.
524525
using ImageSets = std::vector<Volumes>;
525526

526-
std::pair<const char *, size_t> getTagBuffer(const gdcm::DataSet &ds, const gdcm::Tag &tag)
527-
{
528-
if (!ds.FindDataElement(tag) || ds.GetDataElement(tag).IsEmpty())
529-
{
530-
return std::make_pair(nullptr, 0);
531-
}
532-
const gdcm::DataElement de = ds.GetDataElement(tag);
533-
const gdcm::ByteValue *bv = de.GetByteValue();
534-
const char *tagValue = bv->GetPointer();
535-
size_t len = bv->GetLength();
536-
return std::make_pair(tagValue, len);
537-
}
538-
539527
bool compareTags(const gdcm::DataSet &tagsA, const gdcm::DataSet &tagsB, const Tags &tagKeys)
540528
{
541529
for (const auto &tagKey : tagKeys)
@@ -649,13 +637,13 @@ rapidjson::Document toJson(const ImageSets &imageSets)
649637
{
650638
rapidjson::Document imageSetsJson(rapidjson::kArrayType);
651639
rapidjson::Document::AllocatorType &allocator = imageSetsJson.GetAllocator();
652-
gdcm::DataSet dataSet;
653640
Tags instanceSkipTags; // filter out patient, study, series tags from instance object
654641
instanceSkipTags.insert(PATIENT_TAGS.begin(), PATIENT_TAGS.end());
655642
instanceSkipTags.insert(STUDY_TAGS.begin(), STUDY_TAGS.end());
656643
instanceSkipTags.insert(SERIES_TAGS.begin(), SERIES_TAGS.end());
657644
for (const Volumes &volumes : imageSets)
658645
{
646+
gdcm::DataSet dataSet;
659647
rapidjson::Value seriesById(rapidjson::kObjectType);
660648
for (const Volume &volume : volumes)
661649
{
@@ -665,7 +653,8 @@ rapidjson::Document toJson(const ImageSets &imageSets)
665653
FileName file = dicomFile.fileName;
666654
dataSet = dicomFile.dataSet;
667655
rapidjson::Value instanceTagsJson(rapidjson::kObjectType);
668-
toJson(dataSet, {}, instanceSkipTags, instanceTagsJson, allocator);
656+
657+
toJson(dataSet, EMPTY_TAGS, instanceSkipTags, instanceTagsJson, allocator);
669658
rapidjson::Value instance(rapidjson::kObjectType);
670659
instance.AddMember("DICOM", instanceTagsJson, allocator);
671660

@@ -686,7 +675,7 @@ rapidjson::Document toJson(const ImageSets &imageSets)
686675

687676
// Series
688677
rapidjson::Value seriesTags(rapidjson::kObjectType);
689-
toJson(dataSet, SERIES_TAGS, {}, seriesTags, allocator);
678+
toJson(dataSet, SERIES_TAGS, EMPTY_TAGS, seriesTags, allocator);
690679
rapidjson::Value series(rapidjson::kObjectType);
691680
series.AddMember("DICOM", seriesTags, allocator);
692681
series.AddMember("Instances", instances, allocator);
@@ -703,14 +692,14 @@ rapidjson::Document toJson(const ImageSets &imageSets)
703692

704693
// Patient
705694
rapidjson::Value patientTags(rapidjson::kObjectType);
706-
toJson(dataSet, PATIENT_TAGS, {}, patientTags, allocator);
695+
toJson(dataSet, PATIENT_TAGS, EMPTY_TAGS, patientTags, allocator);
707696
rapidjson::Value patient(rapidjson::kObjectType);
708697
patient.AddMember("DICOM", patientTags, allocator);
709698
imageSet.AddMember("Patient", patient, allocator);
710699

711700
// Study
712701
rapidjson::Value studyTags(rapidjson::kObjectType);
713-
toJson(dataSet, STUDY_TAGS, {}, studyTags, allocator);
702+
toJson(dataSet, STUDY_TAGS, EMPTY_TAGS, studyTags, allocator);
714703
rapidjson::Value study(rapidjson::kObjectType);
715704
study.AddMember("DICOM", studyTags, allocator);
716705
study.AddMember("Series", seriesById, allocator);
@@ -739,7 +728,6 @@ int main(int argc, char *argv[])
739728
const ImageSets imageSets = groupByImageSet(volumes);
740729

741730
rapidjson::Document imageSetsJson = toJson(imageSets);
742-
743731
rapidjson::StringBuffer stringBuffer;
744732
rapidjson::Writer<rapidjson::StringBuffer> writer(stringBuffer);
745733
imageSetsJson.Accept(writer);

0 commit comments

Comments
 (0)