Skip to content

Commit 8517bb8

Browse files
committed
Decode with ExtraSamples
These samples are normal bits in the encoded (e.g. compressed) stream of each tile or row of the image. However, when considering the photometric interpretation of an image these samples should be discarded. They are only relevant for multiband. For the most part we previously rejected these images due to a mismatch between sample count and the color as indicated by the interpretation. Only, for alpha we never really verified if the extra channel is indeed alpha or something unrelated. We don't have a perfect solution as GeoTiff motivates us having a MultiBand pseudo-color output configuration that has no related photometric interpretation and borrows the usual gray tone instead. We can only really distinguish this from a gray-alpha image through looking at the values of the ExtraSamples vector. Then again we do not yet support GrayAlpha anyways. As a simplification for the discarding process we only support full-byte extra samples and require all samples to be the same bit-depth, not only those which we return. This is blessed by Tiff's recommendations for baseline decoding as well as libtiff.
1 parent eb62755 commit 8517bb8

File tree

10 files changed

+243
-21
lines changed

10 files changed

+243
-21
lines changed

CHANGES.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,15 @@
22

33
- `Directory` now implements `FromIterator<(Tag, Value)>`.
44

5+
Changes:
6+
- The decoder now interprets the `ExtraSamples` tag. The sample count must now
7+
more strict match the expected value with alpha channels only allowing for
8+
explicitly denoted unassociated or associated alpha. This effects the
9+
indicated color type when decoding images with additional samples indicated
10+
as unspecified relation. Previously, these may have been interpreted as
11+
alpha by the total sample count (e.g. RgbA if 4 samples under a photometric
12+
interpretation of RGB).
13+
514
# Version 0.10.3
615

716
New features:

src/decoder/image.rs

Lines changed: 150 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@ use super::tag_reader::TagReader;
44
use super::ChunkType;
55
use super::{predict_f16, predict_f32, predict_f64, ValueReader};
66
use crate::tags::{
7-
CompressionMethod, PhotometricInterpretation, PlanarConfiguration, Predictor, SampleFormat, Tag,
7+
CompressionMethod, ExtraSamples, PhotometricInterpretation, PlanarConfiguration, Predictor,
8+
SampleFormat, Tag,
89
};
910
use crate::{
1011
ColorType, Directory, TiffError, TiffFormatError, TiffResult, TiffUnsupportedError, UsageError,
@@ -69,6 +70,11 @@ pub(crate) struct Image {
6970
pub height: u32,
7071
pub bits_per_sample: u8,
7172
pub samples: u16,
73+
/// The `ExtraSamples`, defaulting to empty if not given.
74+
pub extra_samples: Vec<ExtraSamples>,
75+
/// Number of samples that belong to the photometric interpretation, samples except
76+
/// `ExtraSamples` (338, 0x0152) tag.
77+
pub photometric_samples: u16,
7278
pub sample_format: SampleFormat,
7379
pub photometric_interpretation: PhotometricInterpretation,
7480
pub compression_method: CompressionMethod,
@@ -134,10 +140,30 @@ impl Image {
134140
.map(Value::into_u16)
135141
.transpose()?
136142
.unwrap_or(1);
143+
137144
if samples == 0 {
138145
return Err(TiffFormatError::SamplesPerPixelIsZero.into());
139146
}
140147

148+
let extra_samples = match tag_reader.find_tag(Tag::ExtraSamples)? {
149+
Some(n) => n.into_u16_vec()?,
150+
None => vec![],
151+
};
152+
153+
let extra_samples = extra_samples
154+
.into_iter()
155+
.map(|x| ExtraSamples::from_u16(x).unwrap_or(ExtraSamples::Unspecified))
156+
.collect::<Vec<_>>();
157+
158+
let photometric_samples = match usize::from(samples).checked_sub(extra_samples.len()) {
159+
None => {
160+
return Err(TiffError::FormatError(
161+
TiffFormatError::InconsistentSizesEncountered,
162+
));
163+
}
164+
Some(n) => n as u16,
165+
};
166+
141167
let sample_format = match tag_reader.find_tag_uint_vec(Tag::SampleFormat)? {
142168
Some(vals) => {
143169
let sample_format: Vec<_> = vals
@@ -297,6 +323,8 @@ impl Image {
297323
height,
298324
bits_per_sample: bits_per_sample[0],
299325
samples,
326+
extra_samples,
327+
photometric_samples,
300328
sample_format,
301329
photometric_interpretation,
302330
compression_method,
@@ -312,24 +340,32 @@ impl Image {
312340
}
313341

314342
pub(crate) fn colortype(&self) -> TiffResult<ColorType> {
343+
let is_alpha_extra_samples = matches!(
344+
self.extra_samples.as_slice(),
345+
[ExtraSamples::AssociatedAlpha] | [ExtraSamples::UnassociatedAlpha]
346+
);
347+
315348
match self.photometric_interpretation {
316-
PhotometricInterpretation::RGB => match self.samples {
317-
3 => Ok(ColorType::RGB(self.bits_per_sample)),
349+
PhotometricInterpretation::RGB => match self.photometric_samples {
350+
3 => Ok(if is_alpha_extra_samples {
351+
ColorType::RGBA(self.bits_per_sample)
352+
} else {
353+
ColorType::RGB(self.bits_per_sample)
354+
}),
318355
4 => Ok(ColorType::RGBA(self.bits_per_sample)),
319-
// FIXME: We should _ignore_ other components. In particular:
320-
// > Beware of extra components. Some TIFF files may have more components per pixel
321-
// than you think. A Baseline TIFF reader must skip over them gracefully,using the
322-
// values of the SamplesPerPixel and BitsPerSample fields.
323-
// > -- TIFF 6.0 Specification, Section 7, Additional Baseline requirements.
324356
_ => Err(TiffError::UnsupportedError(
325357
TiffUnsupportedError::InterpretationWithBits(
326358
self.photometric_interpretation,
327359
vec![self.bits_per_sample; self.samples as usize],
328360
),
329361
)),
330362
},
331-
PhotometricInterpretation::CMYK => match self.samples {
332-
4 => Ok(ColorType::CMYK(self.bits_per_sample)),
363+
PhotometricInterpretation::CMYK => match self.photometric_samples {
364+
4 => Ok(if is_alpha_extra_samples {
365+
ColorType::CMYKA(self.bits_per_sample)
366+
} else {
367+
ColorType::CMYK(self.bits_per_sample)
368+
}),
333369
5 => Ok(ColorType::CMYKA(self.bits_per_sample)),
334370
_ => Err(TiffError::UnsupportedError(
335371
TiffUnsupportedError::InterpretationWithBits(
@@ -338,7 +374,7 @@ impl Image {
338374
),
339375
)),
340376
},
341-
PhotometricInterpretation::YCbCr => match self.samples {
377+
PhotometricInterpretation::YCbCr => match self.photometric_samples {
342378
3 => Ok(ColorType::YCbCr(self.bits_per_sample)),
343379
_ => Err(TiffError::UnsupportedError(
344380
TiffUnsupportedError::InterpretationWithBits(
@@ -351,6 +387,9 @@ impl Image {
351387
// later called when that interpretation is read. That function does not support
352388
// Multiband as a color type and will error. It's unclear how to resolve that exactly.
353389
PhotometricInterpretation::BlackIsZero | PhotometricInterpretation::WhiteIsZero => {
390+
// Note: compatibility with previous implementation requires us to return extra
391+
// samples as `Multiband`. For gray images however the better choice would be
392+
// returning a `Gray` color, i.e. matching on `photometric_samples` instead.
354393
match self.samples {
355394
1 => Ok(ColorType::Gray(self.bits_per_sample)),
356395
_ => Ok(ColorType::Multiband {
@@ -371,12 +410,28 @@ impl Image {
371410
}
372411
}
373412

413+
/// Get the multiband color describing this with its extra samples.
414+
pub(crate) fn color_multiband_with_extras(&self) -> ColorType {
415+
ColorType::Multiband {
416+
bit_depth: self.bits_per_sample,
417+
num_samples: self.samples,
418+
}
419+
}
420+
421+
/// Describe this with an accurate color or a multiband.
422+
pub(crate) fn color_or_fallback(&self) -> ColorType {
423+
self.colortype()
424+
.unwrap_or_else(|_| self.color_multiband_with_extras())
425+
}
426+
374427
pub(crate) fn minimum_row_stride(&self, dims: (u32, u32)) -> Option<NonZeroUsize> {
375428
let (width, height) = dims;
376429

430+
let color = self.color_or_fallback();
431+
377432
let row_stride = u64::from(width)
378-
.saturating_mul(self.samples_per_pixel() as u64)
379-
.saturating_mul(self.bits_per_sample as u64)
433+
.saturating_mul(u64::from(self.samples_per_out_texel(color)))
434+
.saturating_mul(u64::from(self.bits_per_sample))
380435
.div_ceil(8);
381436

382437
// Note: row stride should be smaller than the len if we have an actual buffer. If there
@@ -490,6 +545,13 @@ impl Image {
490545
}
491546
}
492547

548+
pub(crate) fn samples_per_out_texel(&self, color: ColorType) -> u16 {
549+
match self.planar_config {
550+
PlanarConfiguration::Chunky => color.num_samples(),
551+
PlanarConfiguration::Planar => 1,
552+
}
553+
}
554+
493555
/// Number of strips per pixel.
494556
pub(crate) fn strips_per_pixel(&self) -> usize {
495557
match self.planar_config {
@@ -643,15 +705,24 @@ impl Image {
643705
.ok_or(TiffError::FormatError(
644706
TiffFormatError::InconsistentSizesEncountered,
645707
))?;
708+
646709
if *compressed_bytes > limits.intermediate_buffer_size as u64 {
647710
return Err(TiffError::LimitsExceeded);
648711
}
649712

650713
let compression_method = self.compression_method;
651714
let photometric_interpretation = self.photometric_interpretation;
652715
let predictor = self.predictor;
653-
let samples = self.samples_per_pixel();
654716

717+
let samples = self.samples_per_pixel();
718+
let data_samples = self.samples_per_out_texel(color_type);
719+
720+
// We have two dimensions: the 2d rectangle of encoded data and the 2d rectangle this
721+
// takes up in the output. Each has an associated count of bits per pixel. The first
722+
// dimension, i.e. a ''row'', is the number of pixels that are encoded with bit packing
723+
// while the second is the byte-padded array of each so encoded slices.
724+
//
725+
// During decoding we map the relevant bits from one to the other.
655726
let chunk_dims = self.chunk_dimensions()?;
656727
let data_dims = self.chunk_data_dimensions(chunk_index)?;
657728

@@ -661,7 +732,7 @@ impl Image {
661732
let chunk_row_bytes: usize = chunk_row_bits.div_ceil(8).try_into()?;
662733

663734
let data_row_bits = (u64::from(data_dims.0) * u64::from(self.bits_per_sample))
664-
.checked_mul(samples as u64)
735+
.checked_mul(data_samples as u64)
665736
.ok_or(TiffError::LimitsExceeded)?;
666737
let data_row_bytes: usize = data_row_bits.div_ceil(8).try_into()?;
667738

@@ -677,7 +748,11 @@ impl Image {
677748
chunk_dims,
678749
)?;
679750

680-
if output_row_stride == chunk_row_bytes {
751+
let is_all_bits = samples == usize::from(data_samples);
752+
let is_output_chunk_rows = output_row_stride == chunk_row_bytes;
753+
754+
if is_output_chunk_rows && is_all_bits {
755+
// Here we can read directly into the output buffer itself.
681756
let tile = &mut buf[..chunk_row_bytes * data_dims.1 as usize];
682757
reader.read_exact(tile)?;
683758

@@ -690,6 +765,7 @@ impl Image {
690765
predictor,
691766
);
692767
}
768+
693769
if photometric_interpretation == PhotometricInterpretation::WhiteIsZero {
694770
super::invert_colors(tile, color_type, self.sample_format)?;
695771
}
@@ -711,7 +787,8 @@ impl Image {
711787
super::invert_colors(row, color_type, self.sample_format)?;
712788
}
713789
}
714-
} else {
790+
} else if is_all_bits {
791+
// We read row-by-row but each row fits in its output buffer.
715792
for row in buf.chunks_mut(output_row_stride).take(data_dims.1 as usize) {
716793
let row = &mut row[..data_row_bytes];
717794
reader.read_exact(row)?;
@@ -729,6 +806,48 @@ impl Image {
729806
byte_order,
730807
predictor,
731808
);
809+
810+
if photometric_interpretation == PhotometricInterpretation::WhiteIsZero {
811+
super::invert_colors(row, color_type, self.sample_format)?;
812+
}
813+
}
814+
} else {
815+
// The encoded data potentially takes up more space than the output data so we must be
816+
// prepared to discard some of it. That decision is bit-by-bit.
817+
let bits_per_pixel = u32::from(self.bits_per_sample) * u32::from(self.samples);
818+
// Assumes the photometric samples are always the start.. This is slightly problematic.
819+
// To expand spport we should instead have different methods of transforming the read
820+
// buffer data, not only the `compact_photometric_bytes` method below and then choose
821+
// from the right one with supplied parameters. Then we can also bit-for-bit copy with
822+
// a selection for better performance.
823+
let photometric_bit_end = u32::from(self.bits_per_sample) * data_samples as u32;
824+
825+
debug_assert!(bits_per_pixel >= photometric_bit_end);
826+
827+
if bits_per_pixel % 8 != 0 || photometric_bit_end % 8 != 0 {
828+
return Err(TiffError::UnsupportedError(
829+
TiffUnsupportedError::InterpretationWithBits(
830+
self.photometric_interpretation,
831+
vec![self.bits_per_sample; self.samples as usize],
832+
),
833+
));
834+
}
835+
836+
let photo_range = photometric_bit_end / 8..bits_per_pixel / 8;
837+
let mut encoded = vec![0u8; chunk_row_bytes];
838+
for row in buf.chunks_mut(output_row_stride).take(data_dims.1 as usize) {
839+
reader.read_exact(&mut encoded)?;
840+
841+
Self::compact_photometric_bytes(&mut encoded, row, &photo_range);
842+
843+
super::fix_endianness_and_predict(
844+
row,
845+
color_type.bit_depth(),
846+
samples,
847+
byte_order,
848+
predictor,
849+
);
850+
732851
if photometric_interpretation == PhotometricInterpretation::WhiteIsZero {
733852
super::invert_colors(row, color_type, self.sample_format)?;
734853
}
@@ -737,4 +856,18 @@ impl Image {
737856

738857
Ok(())
739858
}
859+
860+
/// Turn a contiguous buffer of a whole number of raw sample arrays into a whole number of
861+
/// photometric sample arrays by removing the extra samples in-between.
862+
fn compact_photometric_bytes(
863+
raw: &mut [u8],
864+
row: &mut [u8],
865+
photo_range: &std::ops::Range<u32>,
866+
) {
867+
raw.chunks_exact_mut(photo_range.end as usize)
868+
.zip(row.chunks_exact_mut(photo_range.start as usize))
869+
.for_each(|(src, dst)| {
870+
dst.copy_from_slice(&src[..photo_range.start as usize]);
871+
});
872+
}
740873
}

src/decoder/mod.rs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -667,6 +667,8 @@ impl<R: Read + Seek> Decoder<R> {
667667
height: 0,
668668
bits_per_sample: 1,
669669
samples: 1,
670+
extra_samples: vec![],
671+
photometric_samples: 1,
670672
sample_format: SampleFormat::Uint,
671673
photometric_interpretation: PhotometricInterpretation::BlackIsZero,
672674
compression_method: CompressionMethod::None,
@@ -981,8 +983,10 @@ impl<R: Read + Seek> Decoder<R> {
981983
chunk_index: u32,
982984
output_width: usize,
983985
) -> TiffResult<()> {
986+
let color = self.image.color_or_fallback();
987+
984988
let output_row_stride = (output_width as u64)
985-
.saturating_mul(self.image.samples_per_pixel() as u64)
989+
.saturating_mul(self.image.samples_per_out_texel(color) as u64)
986990
.saturating_mul(self.image.bits_per_sample as u64)
987991
.div_ceil(8);
988992

@@ -1032,9 +1036,12 @@ impl<R: Read + Seek> Decoder<R> {
10321036
.map_err(|_| TiffError::LimitsExceeded)?
10331037
};
10341038

1039+
let color = self.image.color_or_fallback();
1040+
let samples = self.image.samples_per_out_texel(color);
1041+
10351042
let buffer_size = row_samples
10361043
.checked_mul(height)
1037-
.and_then(|x| x.checked_mul(self.image.samples_per_pixel()))
1044+
.and_then(|x| x.checked_mul(samples.into()))
10381045
.ok_or(TiffError::LimitsExceeded)?;
10391046

10401047
Ok(match self.image().sample_format {
@@ -1232,13 +1239,16 @@ impl<R: Read + Seek> Decoder<R> {
12321239
chunk_dimensions.0.min(width),
12331240
chunk_dimensions.1.min(height),
12341241
);
1242+
12351243
if chunk_dimensions.0 == 0 || chunk_dimensions.1 == 0 {
12361244
return Err(TiffError::FormatError(
12371245
TiffFormatError::InconsistentSizesEncountered,
12381246
));
12391247
}
12401248

1241-
let samples = self.image().samples_per_pixel();
1249+
let color = self.image().colortype()?;
1250+
let samples = self.image().samples_per_out_texel(color);
1251+
12421252
if samples == 0 {
12431253
return Err(TiffError::FormatError(
12441254
TiffFormatError::InconsistentSizesEncountered,

0 commit comments

Comments
 (0)