luci-py/appengine/third_party/mapreduce/records.py at master · mellowdistrict/luci-py · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
#!/usr/bin/env python
# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Lightweight record format.

This format implements log file format from leveldb:
http://leveldb.googlecode.com/svn/trunk/doc/log_format.txt

The main advantages of this format are
1. to detect corruption. Every record has a crc32c checksum.
2. to quickly skip corrupted record to the next valid record.

Full specification of format follows in case leveldb decides to change it.


The log file contents are a sequence of 32KB blocks.  The only
exception is that the tail of the file may contain a partial block.

Each block consists of a sequence of records:
   block := record* trailer?
   record :=
      checksum: uint32  // masked crc32c of type and data[]
      length: uint16
      type: uint8       // One of FULL, FIRST, MIDDLE, LAST
      data: uint8[length]

A record never starts within the last six bytes of a block (since it
won't fit).  Any leftover bytes here form the trailer, which must
consist entirely of zero bytes and must be skipped by readers.

Aside: if exactly seven bytes are left in the current block, and a new
non-zero length record is added, the writer must emit a FIRST record
(which contains zero bytes of user data) to fill up the trailing seven
bytes of the block and then emit all of the user data in subsequent
blocks.

More types may be added in the future.  Some Readers may skip record
types they do not understand, others may report that some data was
skipped.

FULL == 1
FIRST == 2
MIDDLE == 3
LAST == 4

The FULL record contains the contents of an entire user record.

FIRST, MIDDLE, LAST are types used for user records that have been
split into multiple fragments (typically because of block boundaries).
FIRST is the type of the first fragment of a user record, LAST is the
type of the last fragment of a user record, and MID is the type of all
interior fragments of a user record.

Example: consider a sequence of user records:
   A: length 1000
   B: length 97270
   C: length 8000
A will be stored as a FULL record in the first block.

B will be split into three fragments: first fragment occupies the rest
of the first block, second fragment occupies the entirety of the
second block, and the third fragment occupies a prefix of the third
block.  This will leave six bytes free in the third block, which will
be left empty as the trailer.

C will be stored as a FULL record in the fourth block.

"""

__all__ = ['RecordsWriter',
           'RecordsReader']

import logging
import struct

# Note: this will be scrubbed to google.appengine.api.files import crc32c
# when mapreduce is pushed to runtime.
from mapreduce.third_party import crc32c
from mapreduce import errors


# pylint: disable=g-bad-name

# Size of a block.
_BLOCK_SIZE = 32 * 1024

# Header format.
_HEADER_FORMAT = '<IHB'

# Header length in bytes.
_HEADER_LENGTH = struct.calcsize(_HEADER_FORMAT)

# Not a record but padding bytes.
_RECORD_TYPE_NONE = 0

# Full record.
_RECORD_TYPE_FULL = 1

# First data chunk record.
_RECORD_TYPE_FIRST = 2

# Middle data chunk record.
_RECORD_TYPE_MIDDLE = 3

# Last data chunk record.
_RECORD_TYPE_LAST = 4


# CRC Mask. Comes from http://leveldb.googlecode.com/svn/trunk/util/crc32c.h
_CRC_MASK_DELTA = 0xa282ead8


def _mask_crc(crc):
  """Mask crc.

  Args:
    crc: integer crc.
  Returns:
    masked integer crc.
  """
  return (((crc >> 15) | (crc << 17)) + _CRC_MASK_DELTA) & 0xFFFFFFFFL


def _unmask_crc(masked_crc):
  """Unmask crc.

  Args:
    masked_crc: masked integer crc.
  Retruns:
    orignal crc.
  """
  rot = (masked_crc - _CRC_MASK_DELTA) & 0xFFFFFFFFL
  return ((rot >> 17) | (rot << 15)) & 0xFFFFFFFFL


class RecordsWriter(object):
  """A writer for records format."""

  def __init__(self, writer):
    """Constructor.

    Args:
      writer: a writer conforming to Python io.RawIOBase interface that
        implements 'write'.
    """
    self.__writer = writer
    self.__position = 0

  def __write_record(self, record_type, data):
    """Write single physical record."""
    length = len(data)

    crc = crc32c.crc_update(crc32c.CRC_INIT, [record_type])
    crc = crc32c.crc_update(crc, data)
    crc = crc32c.crc_finalize(crc)

    self.__writer.write(
        struct.pack(_HEADER_FORMAT, _mask_crc(crc), length, record_type))
    self.__writer.write(data)
    self.__position += _HEADER_LENGTH + length

  def write(self, data):
    """Write single record.

    Args:
      data: record data to write as string, byte array or byte sequence.
    """
    block_remaining = _BLOCK_SIZE - self.__position % _BLOCK_SIZE

    if block_remaining < _HEADER_LENGTH:
      # Header won't fit into remainder
      self.__writer.write('\x00' * block_remaining)
      self.__position += block_remaining
      block_remaining = _BLOCK_SIZE

    if block_remaining < len(data) + _HEADER_LENGTH:
      first_chunk = data[:block_remaining - _HEADER_LENGTH]
      self.__write_record(_RECORD_TYPE_FIRST, first_chunk)
      data = data[len(first_chunk):]

      while True:
        block_remaining = _BLOCK_SIZE - self.__position % _BLOCK_SIZE
        if block_remaining >= len(data) + _HEADER_LENGTH:
          self.__write_record(_RECORD_TYPE_LAST, data)
          break
        else:
          chunk = data[:block_remaining - _HEADER_LENGTH]
          self.__write_record(_RECORD_TYPE_MIDDLE, chunk)
          data = data[len(chunk):]
    else:
      self.__write_record(_RECORD_TYPE_FULL, data)

  def __enter__(self):
    return self

  def __exit__(self, atype, value, traceback):
    self.close()

  def close(self):
    pass

  def _pad_block(self):
    """Pad block with 0.

    Pad current block with 0. Reader will simply treat these as corrupted
    record and skip the block.

    This method is idempotent.
    """
    pad_length = _BLOCK_SIZE - self.__position % _BLOCK_SIZE
    if pad_length and pad_length != _BLOCK_SIZE:
      self.__writer.write('\x00' * pad_length)
      self.__position += pad_length


class RecordsReader(object):
  """A reader for records format."""

  def __init__(self, reader):
    """Init.

    Args:
      reader: a reader conforming to Python io.RawIOBase interface that
        implements 'read', 'seek', and 'tell'.
    """
    self.__reader = reader

  def __try_read_record(self):
    """Try reading a record.

    Returns:
      (data, record_type) tuple.
    Raises:
      EOFError: when end of file was reached.
      InvalidRecordError: when valid record could not be read.
    """
    block_remaining = _BLOCK_SIZE - self.__reader.tell() % _BLOCK_SIZE
    if block_remaining < _HEADER_LENGTH:
      return ('', _RECORD_TYPE_NONE)

    header = self.__reader.read(_HEADER_LENGTH)
    if len(header) != _HEADER_LENGTH:
      raise EOFError('Read %s bytes instead of %s' %
                     (len(header), _HEADER_LENGTH))

    (masked_crc, length, record_type) = struct.unpack(_HEADER_FORMAT, header)
    crc = _unmask_crc(masked_crc)

    if length + _HEADER_LENGTH > block_remaining:
      # A record can't be bigger than one block.
      raise errors.InvalidRecordError('Length is too big')

    data = self.__reader.read(length)
    if len(data) != length:
      raise EOFError('Not enough data read. Expected: %s but got %s' %
                     (length, len(data)))

    if record_type == _RECORD_TYPE_NONE:
      return ('', record_type)

    actual_crc = crc32c.crc_update(crc32c.CRC_INIT, [record_type])
    actual_crc = crc32c.crc_update(actual_crc, data)
    actual_crc = crc32c.crc_finalize(actual_crc)

    if actual_crc != crc:
      raise errors.InvalidRecordError('Data crc does not match')
    return (data, record_type)

  def __sync(self):
    """Skip reader to the block boundary."""
    pad_length = _BLOCK_SIZE - self.__reader.tell() % _BLOCK_SIZE
    if pad_length and pad_length != _BLOCK_SIZE:
      data = self.__reader.read(pad_length)
      if len(data) != pad_length:
        raise EOFError('Read %d bytes instead of %d' %
                       (len(data), pad_length))

  def read(self):
    """Reads record from current position in reader.

    Returns:
      original bytes stored in a single record.
    """
    data = None
    while True:
      last_offset = self.tell()
      try:
        (chunk, record_type) = self.__try_read_record()
        if record_type == _RECORD_TYPE_NONE:
          self.__sync()
        elif record_type == _RECORD_TYPE_FULL:
          if data is not None:
            logging.warning(
                "Ordering corruption: Got FULL record while already "
                "in a chunk at offset %d", last_offset)
          return chunk
        elif record_type == _RECORD_TYPE_FIRST:
          if data is not None:
            logging.warning(
                "Ordering corruption: Got FIRST record while already "
                "in a chunk at offset %d", last_offset)
          data = chunk
        elif record_type == _RECORD_TYPE_MIDDLE:
          if data is None:
            logging.warning(
                "Ordering corruption: Got MIDDLE record before FIRST "
                "record at offset %d", last_offset)
          else:
            data += chunk
        elif record_type == _RECORD_TYPE_LAST:
          if data is None:
            logging.warning(
                "Ordering corruption: Got LAST record but no chunk is in "
                "progress at offset %d", last_offset)
          else:
            result = data + chunk
            data = None
            return result
        else:
          raise errors.InvalidRecordError(
              "Unsupported record type: %s" % record_type)

      except errors.InvalidRecordError, e:
        logging.warning("Invalid record encountered at %s (%s). Syncing to "
                        "the next block", last_offset, e)
        data = None
        self.__sync()

  def __iter__(self):
    try:
      while True:
        yield self.read()
    except EOFError:
      pass

  def tell(self):
    """Return file's current position."""
    return self.__reader.tell()

  def seek(self, *args, **kwargs):
    """Set the file's current position.

    Arguments are passed directly to the underlying reader.
    """
    return self.__reader.seek(*args, **kwargs)