Skip to content

Commit 0d7b88c

Browse files
authored
Tar: fix handing of null terminated fields. (#117410)
* Tar: fix handing of null terminated fields. The current implementation is trimming spaces and nulls from the end. Instead we need to find the terminating null from the start, and we shouldn't trim spaces at the end. * Fix not advancing over datastream with length 1, which caused the DataOffset_RegularFile_SecondEntry test to fail. * Skip test cases that depend on checksum being checked. * Simplify LimitByRemaining. * Add test. * Refactor SetReachedEnd. * Throw InvalidDataException when the extended header contains invalid records.
1 parent aadccee commit 0d7b88c

File tree

8 files changed

+128
-102
lines changed

8 files changed

+128
-102
lines changed

src/libraries/System.Formats.Tar/src/Resources/Strings.resx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,4 +205,7 @@
205205
<data name="TarStreamSeekabilityUnsupportedCombination" xml:space="preserve">
206206
<value>Cannot write the unseekable data stream of entry '{0}' into an unseekable archive stream.</value>
207207
</data>
208+
<data name="ExtHeaderInvalidRecords" xml:space="preserve">
209+
<value>The extended header contains invalid records.</value>
210+
</data>
208211
</root>

src/libraries/System.Formats.Tar/src/System/Formats/Tar/SubReadStream.cs

Lines changed: 25 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -64,23 +64,26 @@ public override long Position
6464

6565
public override bool CanWrite => false;
6666

67-
internal bool HasReachedEnd
67+
private long Remaining => _endInSuperStream - _positionInSuperStream;
68+
69+
private int LimitByRemaining(int bufferSize) => (int)Math.Min(Remaining, bufferSize);
70+
71+
internal ValueTask AdvanceToEndAsync(CancellationToken cancellationToken)
6872
{
69-
get
70-
{
71-
if (!_hasReachedEnd && _positionInSuperStream > _endInSuperStream)
72-
{
73-
_hasReachedEnd = true;
74-
}
75-
return _hasReachedEnd;
76-
}
77-
set
78-
{
79-
if (value) // Don't allow revert to false
80-
{
81-
_hasReachedEnd = true;
82-
}
83-
}
73+
_hasReachedEnd = true;
74+
75+
long remaining = Remaining;
76+
_positionInSuperStream = _endInSuperStream;
77+
return TarHelpers.AdvanceStreamAsync(_superStream, remaining, cancellationToken);
78+
}
79+
80+
internal void AdvanceToEnd()
81+
{
82+
_hasReachedEnd = true;
83+
84+
long remaining = Remaining;
85+
_positionInSuperStream = _endInSuperStream;
86+
TarHelpers.AdvanceStream(_superStream, remaining);
8487
}
8588

8689
protected void ThrowIfDisposed()
@@ -90,7 +93,7 @@ protected void ThrowIfDisposed()
9093

9194
private void ThrowIfBeyondEndOfStream()
9295
{
93-
if (HasReachedEnd)
96+
if (_hasReachedEnd)
9497
{
9598
throw new EndOfStreamException();
9699
}
@@ -107,21 +110,12 @@ public override int Read(Span<byte> destination)
107110
ThrowIfDisposed();
108111
ThrowIfBeyondEndOfStream();
109112

110-
// parameter validation sent to _superStream.Read
111-
int origCount = destination.Length;
112-
int count = destination.Length;
113-
114-
if (_positionInSuperStream + count > _endInSuperStream)
115-
{
116-
count = (int)(_endInSuperStream - _positionInSuperStream);
117-
}
118-
119-
Debug.Assert(count >= 0);
120-
Debug.Assert(count <= origCount);
113+
destination = destination[..LimitByRemaining(destination.Length)];
121114

122-
int ret = _superStream.Read(destination.Slice(0, count));
115+
int ret = _superStream.Read(destination);
123116

124117
_positionInSuperStream += ret;
118+
125119
return ret;
126120
}
127121

@@ -158,14 +152,12 @@ protected async ValueTask<int> ReadAsyncCore(Memory<byte> buffer, CancellationTo
158152

159153
cancellationToken.ThrowIfCancellationRequested();
160154

161-
if (_positionInSuperStream > _endInSuperStream - buffer.Length)
162-
{
163-
buffer = buffer.Slice(0, (int)(_endInSuperStream - _positionInSuperStream));
164-
}
155+
buffer = buffer[..LimitByRemaining(buffer.Length)];
165156

166157
int ret = await _superStream.ReadAsync(buffer, cancellationToken).ConfigureAwait(false);
167158

168159
_positionInSuperStream += ret;
160+
169161
return ret;
170162
}
171163

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHeader.Read.cs

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
using System.Collections.Generic;
66
using System.Diagnostics;
77
using System.Diagnostics.CodeAnalysis;
8+
using System.Globalization;
89
using System.IO;
910
using System.Text;
1011
using System.Threading;
@@ -384,7 +385,7 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
384385

385386
// Continue with the rest of the fields that require no special checks
386387
TarHeader header = new(initialFormat,
387-
name: TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)),
388+
name: TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.Name, FieldLengths.Name)),
388389
mode: TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Mode, FieldLengths.Mode)),
389390
mTime: ParseAsTimestamp(buffer.Slice(FieldLocations.MTime, FieldLengths.MTime)),
390391
typeFlag: (TarEntryType)buffer[FieldLocations.TypeFlag])
@@ -393,7 +394,7 @@ private async Task ProcessDataBlockAsync(Stream archiveStream, bool copyData, Ca
393394
_size = size,
394395
_uid = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Uid, FieldLengths.Uid)),
395396
_gid = TarHelpers.ParseNumeric<int>(buffer.Slice(FieldLocations.Gid, FieldLengths.Gid)),
396-
_linkName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName))
397+
_linkName = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.LinkName, FieldLengths.LinkName))
397398
};
398399

399400
if (header._format == TarEntryFormat.Unknown)
@@ -517,8 +518,8 @@ private void ReadVersionAttribute(ReadOnlySpan<byte> buffer)
517518
private void ReadPosixAndGnuSharedAttributes(ReadOnlySpan<byte> buffer)
518519
{
519520
// Convert the byte arrays
520-
_uName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.UName, FieldLengths.UName));
521-
_gName = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.GName, FieldLengths.GName));
521+
_uName = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.UName, FieldLengths.UName));
522+
_gName = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.GName, FieldLengths.GName));
522523

523524
// DevMajor and DevMinor only have values with character devices and block devices.
524525
// For all other typeflags, the values in these fields are irrelevant.
@@ -560,7 +561,7 @@ private static DateTimeOffset ParseAsTimestamp(ReadOnlySpan<byte> buffer)
560561
// Throws if a conversion to an expected data type fails.
561562
private void ReadUstarAttributes(ReadOnlySpan<byte> buffer)
562563
{
563-
_prefix = TarHelpers.GetTrimmedUtf8String(buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix));
564+
_prefix = TarHelpers.ParseUtf8String(buffer.Slice(FieldLocations.Prefix, FieldLengths.Prefix));
564565

565566
// In ustar, Prefix is used to store the *leading* path segments of
566567
// Name, if the full path did not fit in the Name byte array.
@@ -631,15 +632,18 @@ void ThrowSizeFieldTooLarge() =>
631632
// Returns a dictionary containing the extended attributes collected from the provided byte buffer.
632633
private void ReadExtendedAttributesFromBuffer(ReadOnlySpan<byte> buffer, string name)
633634
{
634-
buffer = TarHelpers.TrimEndingNullsAndSpaces(buffer);
635-
636635
while (TryGetNextExtendedAttribute(ref buffer, out string? key, out string? value))
637636
{
638637
if (!ExtendedAttributes.TryAdd(key, value))
639638
{
640639
throw new InvalidDataException(SR.Format(SR.TarDuplicateExtendedAttribute, name));
641640
}
642641
}
642+
643+
if (buffer.Length > 0)
644+
{
645+
throw new InvalidDataException(SR.Format(SR.ExtHeaderInvalidRecords));
646+
}
643647
}
644648

645649
// Reads the long path found in the data section of a GNU entry of type 'K' or 'L'
@@ -691,7 +695,7 @@ private async ValueTask ReadGnuLongPathDataBlockAsync(Stream archiveStream, Canc
691695
// Collects the GNU long path info from the buffer and sets it in the right field depending on the type flag.
692696
private void ReadGnuLongPathDataFromBuffer(ReadOnlySpan<byte> buffer)
693697
{
694-
string longPath = TarHelpers.GetTrimmedUtf8String(buffer);
698+
string longPath = TarHelpers.ParseUtf8String(buffer);
695699

696700
if (_typeFlag == TarEntryType.LongLink)
697701
{
@@ -725,15 +729,21 @@ private static bool TryGetNextExtendedAttribute(
725729
}
726730
ReadOnlySpan<byte> line = buffer.Slice(0, newlinePos);
727731

728-
// Update buffer to point to the next line for the next call
729-
buffer = buffer.Slice(newlinePos + 1);
730-
731-
// Find the end of the length and remove everything up through it.
732+
// Find the end of the length
732733
int spacePos = line.IndexOf((byte)' ');
733734
if (spacePos < 0)
734735
{
735736
return false;
736737
}
738+
739+
// Check the length matches the line length
740+
ReadOnlySpan<byte> length = buffer.Slice(0, spacePos);
741+
if (!int.TryParse(length, NumberStyles.None, CultureInfo.InvariantCulture, out int lengthValue) || lengthValue != (line.Length + 1))
742+
{
743+
return false;
744+
}
745+
746+
// Remove the length
737747
line = line.Slice(spacePos + 1);
738748

739749
// Find the equal separator.
@@ -749,6 +759,9 @@ private static bool TryGetNextExtendedAttribute(
749759
// Return the parsed key and value.
750760
key = Encoding.UTF8.GetString(keySlice);
751761
value = Encoding.UTF8.GetString(valueSlice);
762+
763+
// Update buffer to point to the next line for the next call
764+
buffer = buffer.Slice(newlinePos + 1);
752765
return true;
753766
}
754767
}

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarHelpers.cs

Lines changed: 13 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -240,8 +240,10 @@ internal static T ParseNumeric<T>(ReadOnlySpan<byte> buffer) where T : struct, I
240240
/// <summary>Parses a byte span that represents an ASCII string containing a number in octal base.</summary>
241241
internal static T ParseOctal<T>(ReadOnlySpan<byte> buffer) where T : struct, INumber<T>
242242
{
243-
buffer = TrimEndingNullsAndSpaces(buffer);
244-
buffer = TrimLeadingNullsAndSpaces(buffer);
243+
buffer = TrimNullTerminated(buffer);
244+
245+
// We ignore spaces because some archives seem to have them (even though they shouldn't).
246+
buffer = buffer.Trim((byte)' ');
245247

246248
if (buffer.Length == 0)
247249
{
@@ -268,44 +270,23 @@ internal static T ParseOctal<T>(ReadOnlySpan<byte> buffer) where T : struct, INu
268270
private static void ThrowInvalidNumber() =>
269271
throw new InvalidDataException(SR.Format(SR.TarInvalidNumber));
270272

271-
// Returns the string contained in the specified buffer of bytes,
272-
// in the specified encoding, removing the trailing null or space chars.
273-
private static string GetTrimmedString(ReadOnlySpan<byte> buffer, Encoding encoding)
273+
// Returns the null-terminated UTF8 string contained in the specified buffer.
274+
internal static string ParseUtf8String(ReadOnlySpan<byte> buffer)
274275
{
275-
buffer = TrimEndingNullsAndSpaces(buffer);
276-
return buffer.IsEmpty ? string.Empty : encoding.GetString(buffer);
277-
}
278-
279-
internal static ReadOnlySpan<byte> TrimEndingNullsAndSpaces(ReadOnlySpan<byte> buffer)
280-
{
281-
int trimmedLength = buffer.Length;
282-
while (trimmedLength > 0 && buffer[trimmedLength - 1] is 0 or 32)
283-
{
284-
trimmedLength--;
285-
}
286-
287-
return buffer.Slice(0, trimmedLength);
276+
buffer = TrimNullTerminated(buffer);
277+
return Encoding.UTF8.GetString(buffer);
288278
}
289279

290-
private static ReadOnlySpan<byte> TrimLeadingNullsAndSpaces(ReadOnlySpan<byte> buffer)
280+
internal static ReadOnlySpan<byte> TrimNullTerminated(ReadOnlySpan<byte> buffer)
291281
{
292-
int newStart = 0;
293-
while (newStart < buffer.Length && buffer[newStart] is 0 or 32)
282+
int i = buffer.IndexOf((byte)0);
283+
if (i != -1)
294284
{
295-
newStart++;
285+
buffer = buffer[0..i];
296286
}
297-
298-
return buffer.Slice(newStart);
287+
return buffer;
299288
}
300289

301-
// Returns the ASCII string contained in the specified buffer of bytes,
302-
// removing the trailing null or space chars.
303-
internal static string GetTrimmedAsciiString(ReadOnlySpan<byte> buffer) => GetTrimmedString(buffer, Encoding.ASCII);
304-
305-
// Returns the UTF8 string contained in the specified buffer of bytes,
306-
// removing the trailing null or space chars.
307-
internal static string GetTrimmedUtf8String(ReadOnlySpan<byte> buffer) => GetTrimmedString(buffer, Encoding.UTF8);
308-
309290
// After the file contents, there may be zero or more null characters,
310291
// which exist to ensure the data is aligned to the record size. Skip them and
311292
// set the stream position to the first byte of the next entry.

src/libraries/System.Formats.Tar/src/System/Formats/Tar/TarReader.cs

Lines changed: 4 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -221,17 +221,8 @@ internal void AdvanceDataStreamIfNeeded()
221221
return;
222222
}
223223

224-
if (!dataStream.HasReachedEnd)
225-
{
226-
// If the user did not advance the position, we need to make sure the position
227-
// pointer is located at the beginning of the next header.
228-
if (dataStream.Position < (_previouslyReadEntry._header._size - 1))
229-
{
230-
long bytesToSkip = _previouslyReadEntry._header._size - dataStream.Position;
231-
TarHelpers.AdvanceStream(_archiveStream, bytesToSkip);
232-
dataStream.HasReachedEnd = true; // Now the pointer is beyond the limit, so any read attempts should throw
233-
}
234-
}
224+
dataStream.AdvanceToEnd();
225+
235226
TarHelpers.SkipBlockAlignmentPadding(_archiveStream, _previouslyReadEntry._header._size);
236227
}
237228
}
@@ -263,17 +254,8 @@ internal async ValueTask AdvanceDataStreamIfNeededAsync(CancellationToken cancel
263254
return;
264255
}
265256

266-
if (!dataStream.HasReachedEnd)
267-
{
268-
// If the user did not advance the position, we need to make sure the position
269-
// pointer is located at the beginning of the next header.
270-
if (dataStream.Position < (_previouslyReadEntry._header._size - 1))
271-
{
272-
long bytesToSkip = _previouslyReadEntry._header._size - dataStream.Position;
273-
await TarHelpers.AdvanceStreamAsync(_archiveStream, bytesToSkip, cancellationToken).ConfigureAwait(false);
274-
dataStream.HasReachedEnd = true; // Now the pointer is beyond the limit, so any read attempts should throw
275-
}
276-
}
257+
await dataStream.AdvanceToEndAsync(cancellationToken).ConfigureAwait(false);
258+
277259
await TarHelpers.SkipBlockAlignmentPaddingAsync(_archiveStream, _previouslyReadEntry._header._size, cancellationToken).ConfigureAwait(false);
278260
}
279261
}

src/libraries/System.Formats.Tar/tests/TarReader/TarReader.File.Async.Tests.cs

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
using System.IO;
77
using System.IO.Compression;
88
using System.Linq;
9+
using System.Text;
910
using System.Threading.Tasks;
1011
using Xunit;
1112

@@ -261,7 +262,8 @@ public async Task AllowSpacesInOctalFieldsAsync(string folderName, string testCa
261262
[InlineData("gnu-multi-hdrs")] // Multiple consecutive GNU metadata entries
262263
[InlineData("neg-size")] // Garbage chars
263264
[InlineData("invalid-go17")] // Many octal fields are all zero chars
264-
[InlineData("issue11169")] // Checksum with null in the middle
265+
[InlineData("issue11169")] // Extended header uses spaces instead of newlines to separate records
266+
[InlineData("pax-bad-hdr-file")] // Extended header record is not terminated by newline
265267
[InlineData("issue10968")] // Garbage chars
266268
public async Task Throw_ArchivesWithRandomCharsAsync(string testCaseName)
267269
{
@@ -308,6 +310,32 @@ public async Task SparseEntryNotSupportedAsync(string testFolderName, string tes
308310
await Assert.ThrowsAsync<NotSupportedException>(async () => await reader.GetNextEntryAsync());
309311
}
310312

313+
[Fact]
314+
public async Task ReaderIgnoresFieldValueAfterTrailingNullAsync()
315+
{
316+
// Fields in the tar archives are terminated by a trailing null.
317+
// When reading these fields the reader must ignore all bytes past that null.
318+
319+
// Construct an archive that has a filename with some data after the trailing null.
320+
const string FileName = " filename ";
321+
const string FileNameWithDataPastTrailingNull = $"{FileName}\0nonesense";
322+
using MemoryStream ms = new();
323+
using (TarWriter writer = new(ms, leaveOpen: true))
324+
{
325+
var entry = new UstarTarEntry(TarEntryType.RegularFile, FileNameWithDataPastTrailingNull);
326+
writer.WriteEntry(entry);
327+
}
328+
ms.Position = 0;
329+
// Check the writer serialized the complete name passed to the constructor.
330+
bool archiveIsExpected = ms.ToArray().IndexOf(Encoding.UTF8.GetBytes(FileNameWithDataPastTrailingNull)) != -1;
331+
Assert.True(archiveIsExpected);
332+
333+
// Verify the reader doesn't return the data past the trailing null.
334+
using TarReader reader = new(ms);
335+
TarEntry firstEntry = await reader.GetNextEntryAsync();
336+
Assert.Equal(FileName, firstEntry.Name);
337+
}
338+
311339
[Fact]
312340
public async Task DirectoryListRegularFileAndSparseAsync()
313341
{

0 commit comments

Comments
 (0)