Skip to content

Commit 89fa6c2

Browse files
authored
For HTML files with known tokens, generate a TOC. (#1)
1 parent 88ac25c commit 89fa6c2

File tree

14 files changed

+1538
-104
lines changed

14 files changed

+1538
-104
lines changed

.gitmodules

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,6 @@
44
[submodule "third_party/tinyxml2"]
55
path = third_party/tinyxml2
66
url = https://github.com/leethomason/tinyxml2.git
7+
[submodule "third_party/gumbo/gumbo"]
8+
path = third_party/gumbo/gumbo
9+
url = https://github.com/google/gumbo-parser.git

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build deps as a static library." FORCE)
1212
add_subdirectory("third_party/googletest")
1313
add_subdirectory("third_party/tinyxml2")
1414
add_subdirectory("third_party/sqlite3")
15+
add_subdirectory("third_party/gumbo")
1516

1617
add_subdirectory("source")
1718
add_subdirectory("tests")

source/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,15 @@ add_library(doxygen2docset_lib
1414
"token.h"
1515
"token_parser.cc"
1616
"token_parser.h"
17+
"html_parser.h"
18+
"html_parser.cc"
1719
)
1820

1921
target_link_libraries(doxygen2docset_lib
2022
PUBLIC
2123
tinyxml2
2224
sqlite3
25+
gumbo
2326
)
2427

2528
target_include_directories(doxygen2docset_lib

source/builder.cc

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#include "docset_index.h"
66
#include "file.h"
7+
#include "html_parser.h"
78
#include "logger.h"
89
#include "plist_parser.h"
910
#include "token_parser.h"
@@ -46,7 +47,9 @@ bool BuildDocset(const std::string& docs, const std::string& location) {
4647
return false;
4748
}
4849

49-
if (!index.AddTokens(token_parser.ReadTokens())) {
50+
auto tokens = token_parser.ReadTokens();
51+
52+
if (!index.AddTokens(tokens)) {
5053
D2D_ERROR << "Could not add tokens to docset index.";
5154
return false;
5255
}
@@ -60,8 +63,44 @@ bool BuildDocset(const std::string& docs, const std::string& location) {
6063
"Tokens.xml",
6164
"Makefile",
6265
};
63-
auto predicate = [&filtered](const std::string& file_name) -> bool {
64-
return filtered.count(file_name) == 0;
66+
67+
auto tokens_by_file = Token::GetTokensByFile(tokens);
68+
69+
auto predicate = [&filtered, &tokens_by_file](
70+
const std::string& from_file_name, //
71+
const struct stat& from_stat, //
72+
const AutoFD& from_fd, //
73+
const std::string& to_file_name) -> bool {
74+
// Check if this file needs to be filtered away.
75+
if (filtered.count(from_file_name) != 0) {
76+
return true;
77+
}
78+
79+
// Check if this is a file in which a TOC needs to be generated.
80+
{
81+
const auto found = tokens_by_file.find(from_file_name);
82+
if (found != tokens_by_file.end()) {
83+
HTMLParser parser(OpenFileReadOnly(from_fd, from_stat.st_size));
84+
auto html_with_toc = parser.BuildHTMLWithTOC(found->second);
85+
if (html_with_toc.IsValid()) {
86+
if (!CopyData(html_with_toc.Get(), //
87+
html_with_toc.GetSize(), //
88+
to_file_name)) {
89+
D2D_ERROR << "Could not copy HTML with TOC to " << to_file_name
90+
<< ". Will try moving file without TOC.";
91+
92+
} else {
93+
return true;
94+
}
95+
} else {
96+
D2D_ERROR << "Could not build TOC in file: " << from_file_name
97+
<< ". Skipping.";
98+
}
99+
}
100+
}
101+
102+
// Copy file as-is.
103+
return CopyFile(from_stat, from_fd, to_file_name);
65104
};
66105

67106
if (!CopyFiles(docs, documents_directory, predicate)) {

source/file.cc

Lines changed: 58 additions & 98 deletions
Original file line numberDiff line numberDiff line change
@@ -1,97 +1,12 @@
11
#include "file.h"
22

3-
#include <dirent.h>
4-
#include <fcntl.h>
53
#include <string.h>
6-
#include <sys/mman.h>
7-
#include <sys/stat.h>
8-
#include <unistd.h>
94

105
#include <algorithm>
116
#include <sstream>
127

13-
#include "logger.h"
14-
158
namespace d2d {
169

17-
class AutoFD {
18-
public:
19-
AutoFD(int fd) : fd_(fd) {}
20-
21-
AutoFD(AutoFD&& fd) = delete;
22-
23-
AutoFD(const AutoFD& fd) = delete;
24-
25-
AutoFD& operator=(const AutoFD&) = delete;
26-
27-
AutoFD& operator=(AutoFD&& other) {
28-
Reset(other.fd_);
29-
other.fd_ = -1;
30-
return *this;
31-
};
32-
33-
int Get() const { return fd_; };
34-
35-
bool IsValid() const { return fd_ > 0; };
36-
37-
void Reset(int fd = -1) {
38-
if (fd_ != fd && fd_ > 0) {
39-
int result = D2D_TEMP_FAILURE_RETRY(::close(fd_));
40-
if (result == -1) {
41-
D2D_ERROR << "Could not close a file descriptor.";
42-
}
43-
}
44-
fd_ = fd;
45-
}
46-
47-
~AutoFD() { Reset(); }
48-
49-
private:
50-
int fd_ = -1;
51-
};
52-
53-
class AutoDir {
54-
public:
55-
AutoDir(DIR* dir) : dir_(dir) {}
56-
57-
DIR* Get() const { return dir_; }
58-
59-
bool IsValid() const { return dir_ != nullptr; }
60-
61-
~AutoDir() {
62-
if (dir_ != nullptr) {
63-
::closedir(dir_);
64-
}
65-
}
66-
67-
private:
68-
DIR* dir_ = nullptr;
69-
D2D_DISALLOW_COPY_AND_ASSIGN(AutoDir);
70-
};
71-
72-
class AutoMapping {
73-
public:
74-
AutoMapping(void* mapping, size_t size) : mapping_(mapping), size_(size) {}
75-
76-
~AutoMapping() {
77-
if (mapping_ != MAP_FAILED) {
78-
if (::munmap(mapping_, size_) != 0) {
79-
D2D_ERROR << "Error unmapping file.";
80-
}
81-
}
82-
}
83-
84-
void* Get() const { return mapping_; }
85-
86-
bool IsValid() const { return mapping_ != MAP_FAILED; }
87-
88-
private:
89-
void* mapping_ = MAP_FAILED;
90-
size_t size_ = 0;
91-
92-
D2D_DISALLOW_COPY_AND_ASSIGN(AutoMapping);
93-
};
94-
9510
bool MakeDirectories(const std::vector<std::string>& directories) {
9611
AutoFD current_level(AT_FDCWD);
9712

@@ -121,7 +36,8 @@ bool MakeDirectories(const std::vector<std::string>& directories) {
12136
return true;
12237
}
12338

124-
bool CopyData(const void* data, size_t length, const std::string& to_path) {
39+
bool CopyData(const void* from_data, size_t from_length,
40+
const std::string& to_path) {
12541
AutoFD to_file(
12642
D2D_TEMP_FAILURE_RETRY(::open(to_path.c_str(), O_CREAT | O_TRUNC | O_RDWR,
12743
S_IRUSR | S_IWUSR | S_IXUSR)));
@@ -131,31 +47,31 @@ bool CopyData(const void* data, size_t length, const std::string& to_path) {
13147
return false;
13248
}
13349

134-
if (::ftruncate(to_file.Get(), length) != 0) {
50+
if (::ftruncate(to_file.Get(), from_length) != 0) {
13551
D2D_ERROR << "Could not truncate file " << to_path;
13652
return false;
13753
}
138-
AutoMapping to_mapping(::mmap(nullptr, length, PROT_WRITE,
54+
AutoMapping to_mapping(::mmap(nullptr, from_length, PROT_WRITE,
13955
MAP_FILE | MAP_SHARED, to_file.Get(), 0),
140-
length);
56+
from_length);
14157

14258
if (!to_mapping.IsValid()) {
14359
D2D_ERROR << "Could not setup mapping to perform file copy.";
14460
return false;
14561
}
14662

147-
::memcpy(to_mapping.Get(), data, length);
63+
::memcpy(to_mapping.Get(), from_data, from_length);
14864

149-
if (::msync(to_mapping.Get(), length, MS_SYNC) != 0) {
65+
if (::msync(to_mapping.Get(), from_length, MS_SYNC) != 0) {
15066
D2D_ERROR << "Could not sync file contents.";
15167
return false;
15268
}
15369

15470
return true;
15571
}
15672

157-
static bool CopyFile(const struct stat& from_stat, const AutoFD& from,
158-
const std::string& to_path) {
73+
bool CopyFile(const struct stat& from_stat, const AutoFD& from,
74+
const std::string& to_path) {
15975
AutoMapping from_mapping(::mmap(nullptr, from_stat.st_size, PROT_READ,
16076
MAP_FILE | MAP_PRIVATE, from.Get(), 0),
16177
from_stat.st_size);
@@ -212,10 +128,6 @@ bool CopyFiles(const std::string& from_path,
212128
continue;
213129
}
214130

215-
if (!predicate(file_name)) {
216-
continue;
217-
}
218-
219131
AutoFD from_fd(D2D_TEMP_FAILURE_RETRY(
220132
::openat(::dirfd(from.Get()), file_name.c_str(), O_RDONLY)));
221133
if (!from_fd.IsValid()) {
@@ -238,7 +150,11 @@ bool CopyFiles(const std::string& from_path,
238150
return false;
239151
}
240152
} else {
241-
if (!CopyFile(from_stat, from_fd, JoinPaths(to_path, file_name))) {
153+
if (!predicate(file_name, //
154+
from_stat, //
155+
from_fd, //
156+
JoinPaths(to_path, file_name) //
157+
)) {
242158
D2D_ERROR << "Could not copy file " << file_name;
243159
return false;
244160
}
@@ -284,4 +200,48 @@ std::string JoinPaths(const std::vector<std::string>& paths,
284200
return JoinPaths(merged);
285201
}
286202

203+
std::unique_ptr<AutoMapping> OpenFileReadOnly(const AutoFD& fd, size_t size) {
204+
if (!fd.IsValid()) {
205+
D2D_ERROR << "File descriptor was invalid";
206+
return nullptr;
207+
}
208+
209+
auto mapping =
210+
std::make_unique<AutoMapping>(::mmap(nullptr, //
211+
size, //
212+
PROT_READ, //
213+
MAP_FILE | MAP_PRIVATE, //
214+
fd.Get(), //
215+
0),
216+
size);
217+
if (!mapping || !mapping->IsValid()) {
218+
D2D_ERROR << "Could not create file mapping.";
219+
return nullptr;
220+
}
221+
222+
return mapping;
223+
}
224+
225+
std::unique_ptr<AutoMapping> OpenFileReadOnly(const std::string& path) {
226+
if (path.size() == 0) {
227+
D2D_ERROR << "Path was empty when attempting to open file.";
228+
return nullptr;
229+
}
230+
231+
AutoFD fd(D2D_TEMP_FAILURE_RETRY(::open(path.c_str(), O_RDONLY)));
232+
233+
if (!fd.IsValid()) {
234+
D2D_ERROR << "Could not open file: " << path;
235+
return nullptr;
236+
}
237+
238+
struct stat stat_buf = {0};
239+
if (::fstat(fd.Get(), &stat_buf) != 0) {
240+
D2D_ERROR << "Could not stat file.";
241+
return nullptr;
242+
}
243+
244+
return OpenFileReadOnly(fd, stat_buf.st_size);
245+
}
246+
287247
} // namespace d2d

0 commit comments

Comments
 (0)