Skip to content

Commit 69c7991

Browse files
committed
"add snappy library"
1 parent 6540cda commit 69c7991

File tree

12 files changed

+283
-31
lines changed

12 files changed

+283
-31
lines changed

cmake/external/snappy.cmake

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
#
15+
16+
IF(MOBILE_INFERENCE)
17+
return()
18+
ENDIF()
19+
20+
include (ExternalProject)
21+
22+
# NOTE: snappy is needed when linking with recordio
23+
24+
SET(SNAPPY_SOURCES_DIR ${THIRD_PARTY_PATH}/snappy)
25+
SET(SNAPPY_INSTALL_DIR ${THIRD_PARTY_PATH}/install/snappy)
26+
SET(SNAPPY_INCLUDE_DIR "${SNAPPY_INSTALL_DIR}/include/" CACHE PATH "snappy include directory." FORCE)
27+
28+
ExternalProject_Add(
29+
extern_snappy
30+
GIT_REPOSITORY "https://github.com/google/snappy"
31+
GIT_TAG "1.1.7"
32+
PREFIX ${SNAPPY_SOURCES_DIR}
33+
UPDATE_COMMAND ""
34+
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
35+
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
36+
-DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
37+
-DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
38+
-DCMAKE_INSTALL_PREFIX=${SNAPPY_INSTALL_DIR}
39+
-DCMAKE_INSTALL_LIBDIR=${SNAPPY_INSTALL_DIR}/lib
40+
-DCMAKE_POSITION_INDEPENDENT_CODE=ON
41+
-DBUILD_TESTING=OFF
42+
-DCMAKE_BUILD_TYPE=${THIRD_PARTY_BUILD_TYPE}
43+
${EXTERNAL_OPTIONAL_ARGS}
44+
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${SNAPPY_INSTALL_DIR}
45+
-DCMAKE_INSTALL_LIBDIR:PATH=${SNAPPY_INSTALL_DIR}/lib
46+
-DCMAKE_POSITION_INDEPENDENT_CODE:BOOL=ON
47+
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
48+
BUILD_COMMAND make -j8
49+
INSTALL_COMMAND make install
50+
)
51+
52+
add_library(snappy STATIC IMPORTED GLOBAL)
53+
set_property(TARGET snappy PROPERTY IMPORTED_LOCATION
54+
"${SNAPPY_INSTALL_DIR}/lib/libsnappy.a")
55+
56+
include_directories(${SNAPPY_INCLUDE_DIR})
57+
add_dependencies(snappy extern_snappy)

paddle/fluid/recordio/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,4 @@
11
cc_library(header SRCS header.cc)
22
cc_test(header_test SRCS header_test.cc DEPS header)
3+
cc_library(io SRCS io.cc DEPS stringpiece)
4+
cc_test(io_test SRCS io_test.cc DEPS io)

paddle/fluid/recordio/chunk.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,9 +32,11 @@ class Chunk {
3232
bool Dump(std::ostream& os, Compressor ct);
3333
void Parse(std::istream& iss, int64_t offset);
3434
const std::string Record(int i) { return records_[i]; }
35+
size_t NumBytes() { return num_bytes_; }
3536

3637
private:
3738
std::vector<std::string> records_;
39+
// sum of record lengths in bytes.
3840
size_t num_bytes_;
3941
};
4042

paddle/fluid/recordio/filesys.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <fcntl.h>
18+
#include <stdio.h>
19+
#include <unistd.h>
20+
21+
class DefaultFileSys {
22+
public:
23+
private:
24+
};

paddle/fluid/recordio/header_test.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
#include "gtest/gtest.h"
2020

21-
using namespace recordio;
21+
using namespace paddle::recordio;
2222

2323
TEST(Recordio, ChunkHead) {
2424
Header hdr(0, 1, Compressor::kGzip, 3);
@@ -32,5 +32,5 @@ TEST(Recordio, ChunkHead) {
3232
std::ostringstream oss2;
3333
hdr2.Write(oss2);
3434
EXPECT_STREQ(oss2.str().c_str(), oss.str().c_str());
35-
EXPECT_EQ(hdr == hdr2);
35+
EXPECT_TRUE(hdr == hdr2);
3636
}

paddle/fluid/recordio/io.cc

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/recordio/io.h"
16+
#include "paddle/fluid/string/piece.h"
17+
18+
namespace paddle {
19+
namespace recordio {
20+
Stream* Stream::Open(const char* filename, const char* mode) {
21+
// Create IOStream for different filesystems
22+
// HDFS: hdfs://tmp/file.txt
23+
// Default: /tmp/file.txt
24+
FILE* fp = nullptr;
25+
if (string::HasPrefix(string::Piece(filename), string::Piece("/"))) {
26+
fp = fopen(filename, mode);
27+
}
28+
return new FileStream(fp);
29+
}
30+
31+
size_t FileStream::Read(void* ptr, size_t size) {
32+
return fread(ptr, 1, size, fp_);
33+
}
34+
35+
void FileStream::Write(const void* ptr, size_t size) {
36+
size_t real = fwrite(ptr, 1, size, fp_);
37+
PADDLE_ENFORCE(real == size, "FileStream write incomplete.");
38+
}
39+
40+
size_t FileStream::Tell() { return ftell(fp_); }
41+
void FileStream::Seek(size_t p) { fseek(fp_, static_cast<long>(p), SEEK_SET); }
42+
43+
bool FileStream::Eof() { return feof(fp_); }
44+
45+
void FileStream::Close() {
46+
if (fp_ != nullptr) {
47+
fclose(fp_);
48+
fp_ = nullptr;
49+
}
50+
}
51+
52+
} // namespace recordio
53+
} // namespace paddle

paddle/fluid/recordio/io.h

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#pragma once
16+
17+
#include <stdio.h>
18+
#include <string>
19+
#include "paddle/fluid/platform/enforce.h"
20+
21+
namespace paddle {
22+
namespace recordio {
23+
24+
// Stream abstract object for read and write
25+
class Stream {
26+
public:
27+
virtual ~Stream() {}
28+
virtual size_t Read(void* ptr, size_t size);
29+
virtual void Write(const void* ptr, size_t size);
30+
virtual size_t Tell();
31+
virtual void Seek();
32+
// Create Stream Instance
33+
static Stream* Open(const char* filename, const char* mode);
34+
};
35+
36+
// FileStream
37+
class FileStream : public Stream {
38+
public:
39+
explicit FileStream(FILE* fp) : fp_(fp) {}
40+
~FileStream() { this->Close(); }
41+
size_t Read(void* ptr, size_t size);
42+
void Write(const void* ptr, size_t size);
43+
size_t Tell();
44+
void Seek(size_t p);
45+
bool Eof();
46+
void Close();
47+
48+
private:
49+
FILE* fp_;
50+
};
51+
52+
} // namespace recordio
53+
} // namespace paddle

paddle/fluid/recordio/io_test.cc

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
#include "paddle/fluid/recordio/io.h"
16+
17+
#include "gtest/gtest.h"
18+
19+
using namespace paddle::recordio;
20+
21+
TEST(FileStream, IO) {
22+
{
23+
// Write
24+
Stream* fs = Stream::Open("/tmp/record_0", "rw");
25+
fs->Write("hello", 6);
26+
delete fs;
27+
}
28+
{
29+
// Read
30+
Stream* fs = Stream::Open("/tmp/record_0", "r+");
31+
char buf[10];
32+
fs->Read(&buf, 6);
33+
EXPECT_STREQ(buf, "hello");
34+
delete fs;
35+
}
36+
}

paddle/fluid/recordio/scanner.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,9 @@
2121
#include <utility>
2222
#include <vector>
2323

24-
// Scanner
24+
class RangeScanner;
2525

26+
// Scanner is a scanner for multiple recordio files.
2627
class Scanner {
2728
public:
2829
Scanner(const char* paths);

paddle/fluid/recordio/writer.cc

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,29 +17,36 @@
1717
namespace paddle {
1818
namespace recordio {
1919

20-
Writer::Writer(std::ostream& os)
21-
: stream_(os.rdbuf()), max_chunk_size_(0), compressor_(0) {}
20+
Writer::Writer(Stream* fo) : stream_(fo), max_chunk_size_(0), compressor_(0) {}
2221

23-
Writer::Writer(std::ostream& os, int maxChunkSize, int compressor)
24-
: stream_(os.rdbuf()),
22+
Writer::Writer(Stream* fo, int maxChunkSize, int compressor)
23+
: stream_(fo),
2524
max_chunk_size_(maxChunkSize),
26-
compressor_(compressor) {
27-
// clear rdstate
28-
stream_.clear();
25+
compressor_(static_cast<Compressor>(compressor)) {
2926
chunk_.reset(new Chunk);
3027
}
3128

32-
size_t Writer::Write(const std::string& buf) { return Write(std::string(buf)); }
33-
34-
size_t Writer::Write(const char* buf, size_t length) {
35-
return Write(std::string(buf, length));
29+
size_t Writer::Write(const std::string& record) {
30+
if (stream_ == nullptr) {
31+
LOG(WARNING) << "Cannot write since writer had been closed.";
32+
return 0;
33+
}
34+
if ((record.size() + chunk_->NumBytes()) > max_chunk_size_) {
35+
chunk_->Dump(stream_, compressor_);
36+
}
37+
chunk_->Add(record);
38+
return record.size();
3639
}
3740

38-
size_t Writer::Write(std::string&& buf) {}
41+
// size_t Writer::Write(const char* buf, size_t length) {
42+
// return Write(std::string(buf, length));
43+
// }
44+
45+
// size_t Writer::Write(std::string&& buf) {}
3946

4047
void Writer::Close() {
41-
stream_.flush();
42-
stream_.setstate(std::ios::eofbit);
48+
chunk_->Dump(stream_, compressor_);
49+
stream_ = nullptr;
4350
}
4451

4552
} // namespace recordio

0 commit comments

Comments
 (0)