Skip to content

Commit ddf7de0

Browse files
authored
Merge pull request #14291 from NixOS/skip-source
Add skip() method to Source interface to allow efficient seeks
2 parents 1fabed1 + 6c9083d commit ddf7de0

File tree

5 files changed

+66
-4
lines changed

5 files changed

+66
-4
lines changed

src/libutil/archive.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,11 @@ static void parseContents(CreateRegularFileSink & sink, Source & source)
132132

133133
sink.preallocateContents(size);
134134

135+
if (sink.skipContents) {
136+
source.skip(size + (size % 8 ? 8 - (size % 8) : 0));
137+
return;
138+
}
139+
135140
uint64_t left = size;
136141
std::array<char, 65536> buf;
137142

@@ -166,7 +171,7 @@ static void parse(FileSystemObjectSink & sink, Source & source, const CanonPath
166171
auto expectTag = [&](std::string_view expected) {
167172
auto tag = getString();
168173
if (tag != expected)
169-
throw badArchive("expected tag '%s', got '%s'", expected, tag);
174+
throw badArchive("expected tag '%s', got '%s'", expected, tag.substr(0, 1024));
170175
};
171176

172177
expectTag("(");

src/libutil/fs-sink.cc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,8 @@ void NullFileSystemObjectSink::createRegularFile(
196196
void isExecutable() override {}
197197
} crf;
198198

199+
crf.skipContents = true;
200+
199201
// Even though `NullFileSystemObjectSink` doesn't do anything, it's important
200202
// that we call the function, to e.g. advance the parser using this
201203
// sink.

src/libutil/include/nix/util/fs-sink.hh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,14 @@ namespace nix {
1414
*/
1515
struct CreateRegularFileSink : Sink
1616
{
17+
/**
18+
* If set to true, the sink will not be called with the contents
19+
* of the file. `preallocateContents()` will still be called to
20+
* convey the file size. Useful for sinks that want to efficiently
21+
* discard the contents of the file.
22+
*/
23+
bool skipContents = false;
24+
1725
virtual void isExecutable() = 0;
1826

1927
/**

src/libutil/include/nix/util/serialise.hh

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ struct Source
9797
void drainInto(Sink & sink);
9898

9999
std::string drain();
100+
101+
virtual void skip(size_t len);
100102
};
101103

102104
/**
@@ -177,6 +179,7 @@ struct FdSource : BufferedSource
177179
Descriptor fd;
178180
size_t read = 0;
179181
BackedStringView endOfFileError{"unexpected end-of-file"};
182+
bool isSeekable = true;
180183

181184
FdSource()
182185
: fd(INVALID_DESCRIPTOR)
@@ -200,6 +203,8 @@ struct FdSource : BufferedSource
200203
*/
201204
bool hasData();
202205

206+
void skip(size_t len) override;
207+
203208
protected:
204209
size_t readUnbuffered(char * data, size_t len) override;
205210
private:

src/libutil/serialise.cc

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,8 @@ void Source::drainInto(Sink & sink)
9494
{
9595
std::array<char, 8192> buf;
9696
while (true) {
97-
size_t n;
9897
try {
99-
n = read(buf.data(), buf.size());
98+
auto n = read(buf.data(), buf.size());
10099
sink({buf.data(), n});
101100
} catch (EndOfFile &) {
102101
break;
@@ -111,6 +110,16 @@ std::string Source::drain()
111110
return std::move(s.s);
112111
}
113112

113+
void Source::skip(size_t len)
114+
{
115+
std::array<char, 8192> buf;
116+
while (len) {
117+
auto n = read(buf.data(), std::min(len, buf.size()));
118+
assert(n <= len);
119+
len -= n;
120+
}
121+
}
122+
114123
size_t BufferedSource::read(char * data, size_t len)
115124
{
116125
if (!buffer)
@@ -120,7 +129,7 @@ size_t BufferedSource::read(char * data, size_t len)
120129
bufPosIn = readUnbuffered(buffer.get(), bufSize);
121130

122131
/* Copy out the data in the buffer. */
123-
size_t n = len > bufPosIn - bufPosOut ? bufPosIn - bufPosOut : len;
132+
auto n = std::min(len, bufPosIn - bufPosOut);
124133
memcpy(data, buffer.get() + bufPosOut, n);
125134
bufPosOut += n;
126135
if (bufPosIn == bufPosOut)
@@ -191,6 +200,39 @@ bool FdSource::hasData()
191200
}
192201
}
193202

203+
void FdSource::skip(size_t len)
204+
{
205+
/* Discard data in the buffer. */
206+
if (len && buffer && bufPosIn - bufPosOut) {
207+
if (len >= bufPosIn - bufPosOut) {
208+
len -= bufPosIn - bufPosOut;
209+
bufPosIn = bufPosOut = 0;
210+
} else {
211+
bufPosOut += len;
212+
len = 0;
213+
}
214+
}
215+
216+
#ifndef _WIN32
217+
/* If we can, seek forward in the file to skip the rest. */
218+
if (isSeekable && len) {
219+
if (lseek(fd, len, SEEK_CUR) == -1) {
220+
if (errno == ESPIPE)
221+
isSeekable = false;
222+
else
223+
throw SysError("seeking forward in file");
224+
} else {
225+
read += len;
226+
return;
227+
}
228+
}
229+
#endif
230+
231+
/* Otherwise, skip by reading. */
232+
if (len)
233+
BufferedSource::skip(len);
234+
}
235+
194236
size_t StringSource::read(char * data, size_t len)
195237
{
196238
if (pos == s.size())

0 commit comments

Comments
 (0)