Skip to content

Commit d213b45

Browse files
authored
Merge pull request #107 from rw2/http-ls
Http ls
2 parents ad01cd3 + 49e61cc commit d213b45

16 files changed

+430
-91
lines changed

CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,12 @@ target_link_libraries(XrdOssS3 XrdS3Obj)
112112
######################
113113
## libXrdOssHTTP ##
114114
######################
115-
add_library(XrdHTTPServerObj OBJECT src/CurlUtil.cc src/HTTPFile.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc)
115+
add_library(XrdHTTPServerObj OBJECT src/CurlUtil.cc src/HTTPFile.cc src/HTTPDirectory.cc src/HTTPFileSystem.cc src/HTTPCommands.cc src/TokenFile.cc src/stl_string_utils.cc src/shortfile.cc src/logging.cc)
116116
set_target_properties(XrdHTTPServerObj PROPERTIES POSITION_INDEPENDENT_CODE ON)
117117
target_include_directories(XrdHTTPServerObj PRIVATE ${XRootD_INCLUDE_DIRS})
118-
target_link_libraries(XrdHTTPServerObj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRARIES} CURL::libcurl OpenSSL::Crypto Threads::Threads std::filesystem)
118+
target_link_libraries(XrdHTTPServerObj ${XRootD_UTILS_LIBRARIES} ${XRootD_SERVER_LIBRARIES} CURL::libcurl tinyxml2::tinyxml2 OpenSSL::Crypto Threads::Threads std::filesystem)
119119

120+
# Compatability library, doesn't match upstream's naming convention
120121
add_library(XrdHTTPServer MODULE "$<TARGET_OBJECTS:XrdHTTPServerObj>")
121122
target_link_libraries(XrdHTTPServer XrdHTTPServerObj)
122123
# New library name, matches upstream's naming convention

src/GlobusDirectory.hh

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,25 @@
1919
#pragma once
2020

2121
#include "GlobusFileSystem.hh"
22-
#include "HTTPDirectory.hh"
22+
23+
#include <XrdOss/XrdOss.hh>
2324

2425
#include <string>
2526
#include <vector>
2627

28+
class XrdSysError;
29+
2730
// Structure to hold Globus object information
2831
struct GlobusObjectInfo {
2932
size_t m_size;
3033
std::string m_key;
3134
std::string m_last_modified;
3235
};
3336

34-
class GlobusDirectory : public HTTPDirectory {
37+
class GlobusDirectory : public XrdOssDF {
3538
public:
3639
GlobusDirectory(XrdSysError &log, const GlobusFileSystem &fs)
37-
: HTTPDirectory(log), m_fs(fs) {}
40+
: m_log(log), m_fs(fs) {}
3841

3942
virtual ~GlobusDirectory() {}
4043

@@ -56,6 +59,7 @@ class GlobusDirectory : public HTTPDirectory {
5659
std::vector<GlobusObjectInfo> m_directories;
5760
std::string m_prefix;
5861
std::string m_object;
62+
XrdSysError &m_log;
5963
const GlobusFileSystem &m_fs;
6064
struct stat *m_stat_buf{nullptr};
6165
};

src/HTTPCommands.cc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -873,6 +873,18 @@ bool HTTPDownload::SendRequest(off_t offset, size_t size) {
873873

874874
// ---------------------------------------------------------------------------
875875

876+
HTTPList::~HTTPList() {}
877+
878+
bool HTTPList::SendRequest() {
879+
expectedResponseCode = {200};
880+
881+
httpVerb = "GET";
882+
std::string noPayloadAllowed;
883+
return SendHTTPRequest(noPayloadAllowed);
884+
}
885+
886+
// ---------------------------------------------------------------------------
887+
876888
HTTPHead::~HTTPHead() {}
877889

878890
bool HTTPHead::SendRequest() {

src/HTTPCommands.hh

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -351,6 +351,22 @@ class HTTPDownload final : public HTTPRequest {
351351
std::string object;
352352
};
353353

354+
class HTTPList final : public HTTPRequest {
355+
public:
356+
HTTPList(const std::string &h, const std::string &o, XrdSysError &log,
357+
const TokenFile *token)
358+
: HTTPRequest(h, log, token), object(o) {
359+
hostUrl = hostUrl + "/" + object;
360+
}
361+
362+
virtual ~HTTPList();
363+
364+
virtual bool SendRequest();
365+
366+
protected:
367+
std::string object;
368+
};
369+
354370
class HTTPHead final : public HTTPRequest {
355371
public:
356372
HTTPHead(const std::string &h, const std::string &o, XrdSysError &log,

src/HTTPDirectory.cc

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
/***************************************************************
2+
*
3+
* Copyright (C) 2024, Pelican Project, Morgridge Institute for Research
4+
*
5+
* Licensed under the Apache License, Version 2.0 (the "License"); you
6+
* may not use this file except in compliance with the License. You may
7+
* obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*
17+
***************************************************************/
18+
19+
#include "HTTPDirectory.hh"
20+
#include "HTTPCommands.hh"
21+
#include "HTTPFile.hh"
22+
#include "HTTPFileSystem.hh"
23+
#include "logging.hh"
24+
#include "stl_string_utils.hh"
25+
26+
#include <XrdOuc/XrdOucEnv.hh>
27+
#include <XrdOuc/XrdOucStream.hh>
28+
#include <XrdSec/XrdSecEntity.hh>
29+
#include <XrdSec/XrdSecEntityAttr.hh>
30+
#include <XrdSfs/XrdSfsInterface.hh>
31+
#include <XrdSys/XrdSysError.hh>
32+
#include <XrdVersion.hh>
33+
34+
#include <curl/curl.h>
35+
#include <filesystem>
36+
#include <iostream>
37+
#include <map>
38+
#include <memory>
39+
#include <mutex>
40+
#include <regex>
41+
#include <sstream>
42+
#include <string>
43+
#include <tinyxml2.h>
44+
45+
using namespace XrdHTTPServer;
46+
47+
HTTPDirectory::HTTPDirectory(XrdSysError &log, HTTPFileSystem &oss)
48+
: m_log(log), m_oss(oss) {} // Initialize it to false.
49+
50+
void HTTPDirectory::parseHTMLToListing(const std::string &htmlContent) {
51+
m_remoteList.clear();
52+
53+
tinyxml2::XMLDocument doc;
54+
tinyxml2::XMLError error = doc.Parse(htmlContent.c_str());
55+
if (error != tinyxml2::XML_SUCCESS) {
56+
m_log.Log(LogMask::Warning, "HTTPDirectory",
57+
"Failed to parse HTML in directory response");
58+
return;
59+
}
60+
61+
// Root of the HTML document
62+
auto root = doc.FirstChild();
63+
if (!root) {
64+
m_log.Log(LogMask::Warning, "HTTPDirectory",
65+
"No root found in HTML in directory response");
66+
return;
67+
}
68+
69+
// Traverse the rows in the table
70+
for (auto row = root->FirstChildElement("tr"); row != nullptr;
71+
row = row->NextSiblingElement("tr")) {
72+
Entry entry;
73+
int columnIndex = 0;
74+
75+
// Traverse each cell in the row
76+
for (auto cell = row->FirstChildElement("td"); cell != nullptr;
77+
cell = cell->NextSiblingElement("td")) {
78+
const char *cellText = cell->GetText() ? cell->GetText() : "";
79+
80+
switch (columnIndex) {
81+
case 0: // Mode
82+
entry.mode = cellText;
83+
break;
84+
case 1: // Flags
85+
entry.flags = cellText;
86+
break;
87+
case 2: // Size
88+
entry.size = cellText;
89+
break;
90+
case 3: // Modified
91+
entry.modified = cellText;
92+
break;
93+
case 4: // Name
94+
if (auto aTag = cell->FirstChildElement("a")) {
95+
const char *nameText =
96+
aTag->GetText() ? aTag->GetText() : "";
97+
entry.name = nameText;
98+
}
99+
break;
100+
default:
101+
break;
102+
}
103+
columnIndex++;
104+
}
105+
106+
// Skip adding invalid/empty rows
107+
if (entry.name.empty()) {
108+
continue;
109+
}
110+
111+
struct stat workingFile;
112+
workingFile.st_size = std::stoul(entry.size, nullptr, 10);
113+
// workingFile.st_mtime = std::stoul(entry.modified, nullptr, 10);
114+
if (entry.mode.substr(0, 1) == "d")
115+
workingFile.st_mode = 0600 | S_IFDIR;
116+
else
117+
workingFile.st_mode = 0600 | S_IFREG;
118+
119+
workingFile.st_nlink = 1;
120+
workingFile.st_uid = 1;
121+
workingFile.st_gid = 1;
122+
workingFile.st_atime = 0;
123+
workingFile.st_ctime = 0;
124+
workingFile.st_dev = 0;
125+
workingFile.st_ino = 0;
126+
m_remoteList.push_back({entry.name, workingFile});
127+
}
128+
}
129+
130+
std::string HTTPDirectory::extractHTMLTable(const std::string &htmlContent) {
131+
std::regex tableRegex(R"(<table[^>]*>[\s\S]*?</table>)",
132+
std::regex_constants::icase);
133+
134+
std::smatch match;
135+
if (std::regex_search(htmlContent, match, tableRegex)) {
136+
return match.str();
137+
}
138+
139+
return ""; // Return an empty string if no table is found
140+
}
141+
142+
int HTTPDirectory::Readdir(char *buff, int blen) {
143+
if (m_remoteList.size() > 0) {
144+
std::string name = m_remoteList.begin()->first;
145+
struct stat currentRecord = m_remoteList.begin()->second;
146+
mystat->st_size = currentRecord.st_size;
147+
mystat->st_mode = currentRecord.st_mode;
148+
mystat->st_nlink = currentRecord.st_nlink;
149+
mystat->st_uid = currentRecord.st_uid;
150+
mystat->st_gid = currentRecord.st_gid;
151+
mystat->st_atime = currentRecord.st_atime;
152+
mystat->st_ctime = currentRecord.st_ctime;
153+
mystat->st_dev = currentRecord.st_dev;
154+
mystat->st_ino = currentRecord.st_ino;
155+
memcpy(buff, name.c_str(), name.size() + 1);
156+
m_remoteList.erase(m_remoteList.begin());
157+
return name.size();
158+
} else {
159+
buff[0] = '\0';
160+
return 0;
161+
}
162+
}
163+
164+
int HTTPDirectory::Opendir(const char *path, XrdOucEnv &env) {
165+
m_log.Log(LogMask::Debug, "HTTPDirectory::Opendir", "Opendir called");
166+
auto configured_hostname = m_oss.getHTTPHostName();
167+
auto configured_hostUrl = m_oss.getHTTPHostUrl();
168+
const auto &configured_url_base = m_oss.getHTTPUrlBase();
169+
if (!configured_url_base.empty()) {
170+
configured_hostUrl = configured_url_base;
171+
configured_hostname = m_oss.getStoragePrefix();
172+
}
173+
174+
//
175+
// Check the path for validity.
176+
//
177+
std::string object;
178+
int rv = parse_path(m_oss.getHTTPHostName(), path, object);
179+
180+
if (rv != 0) {
181+
return rv;
182+
}
183+
184+
if (m_remoteList.empty()) {
185+
m_log.Log(LogMask::Debug, "HTTPFile::Opendir", "Opendir called");
186+
HTTPList list(configured_hostUrl, object, m_log, m_oss.getToken());
187+
m_log.Log(LogMask::Debug, "HTTPDirectory::Opendir",
188+
"About to perform download from HTTPDirectory::Opendir(): "
189+
"hostname / object:",
190+
configured_hostname.c_str(), object.c_str());
191+
if (!list.SendRequest()) {
192+
std::stringstream ss;
193+
ss << "Failed to send GetObject command: " << list.getResponseCode()
194+
<< "'" << list.getResultString() << "'";
195+
m_log.Log(LogMask::Warning, "HTTPDirectory::Opendir",
196+
ss.str().c_str());
197+
return 0;
198+
}
199+
200+
parseHTMLToListing(extractHTMLTable(list.getResultString()));
201+
}
202+
203+
return 0;
204+
}

src/HTTPDirectory.hh

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,27 +18,46 @@
1818

1919
#pragma once
2020

21-
#include "XrdOss/XrdOss.hh"
22-
#include "XrdOuc/XrdOucEnv.hh"
21+
#include "HTTPFileSystem.hh"
22+
#include "logging.hh"
2323

24-
class XrdSysError;
24+
#include <XrdOss/XrdOss.hh>
25+
#include <XrdOuc/XrdOucEnv.hh>
26+
#include <XrdSfs/XrdSfsInterface.hh>
27+
28+
#include <map>
29+
#include <vector>
2530

2631
class HTTPDirectory : public XrdOssDF {
2732
public:
28-
HTTPDirectory(XrdSysError &log) : m_log(log) {}
29-
33+
HTTPDirectory(XrdSysError &log, HTTPFileSystem &oss);
3034
virtual ~HTTPDirectory() {}
3135

32-
virtual int Opendir(const char *path, XrdOucEnv &env) override {
33-
return -ENOSYS;
34-
}
36+
virtual int Opendir(const char *path, XrdOucEnv &env) override;
3537

36-
virtual int Readdir(char *buff, int blen) override { return -ENOSYS; }
38+
virtual int Readdir(char *buff, int blen) override;
3739

38-
virtual int StatRet(struct stat *statStruct) override { return -ENOSYS; }
40+
virtual int StatRet(struct stat *statStruct) override {
41+
mystat = statStruct;
42+
return SFS_OK;
43+
}
3944

4045
virtual int Close(long long *retsz = 0) override { return -ENOSYS; }
4146

42-
protected:
47+
private:
48+
struct Entry {
49+
std::string mode;
50+
std::string flags;
51+
std::string size;
52+
std::string modified;
53+
std::string name;
54+
};
55+
56+
void parseHTMLToListing(const std::string &htmlContent);
57+
std::string extractHTMLTable(const std::string &htmlContent);
58+
59+
struct stat *mystat;
4360
XrdSysError &m_log;
61+
HTTPFileSystem &m_oss;
62+
std::vector<std::pair<std::string, struct stat>> m_remoteList;
4463
};

0 commit comments

Comments
 (0)