Skip to content

Commit c504609

Browse files
authored
Merge pull request #47013 from makortel/reduceInputFileCatalogMemory
Remove redundant data members from InputFileCatalog to reduce memory use
2 parents 7915102 + 5744eaa commit c504609

File tree

9 files changed

+259
-108
lines changed

9 files changed

+259
-108
lines changed

FWCore/Catalog/interface/InputFileCatalog.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
namespace edm {
2020
class FileCatalogItem {
2121
public:
22-
FileCatalogItem(std::vector<std::string> const& pfns, std::string const& lfn) : pfns_(pfns), lfn_(lfn) {}
22+
FileCatalogItem(std::vector<std::string> pfns, std::string lfn) : pfns_(std::move(pfns)), lfn_(std::move(lfn)) {}
2323

2424
std::string const& fileName(unsigned iCatalog) const { return pfns_[iCatalog]; }
2525
std::string const& logicalFileName() const { return lfn_; }
@@ -33,7 +33,7 @@ namespace edm {
3333

3434
class InputFileCatalog {
3535
public:
36-
InputFileCatalog(std::vector<std::string> const& fileNames,
36+
InputFileCatalog(std::vector<std::string> fileNames,
3737
std::string const& override,
3838
bool useLFNasPFNifLFNnotFound = false,
3939
//switching between two catalog types
@@ -42,19 +42,19 @@ namespace edm {
4242

4343
~InputFileCatalog();
4444
std::vector<FileCatalogItem> const& fileCatalogItems() const { return fileCatalogItems_; }
45-
std::vector<std::string> const& logicalFileNames() const { return logicalFileNames_; }
4645
std::vector<std::string> fileNames(unsigned iCatalog) const;
4746
bool empty() const { return fileCatalogItems_.empty(); }
4847
static bool isPhysical(std::string const& name) { return (name.empty() || name.find(':') != std::string::npos); }
4948

5049
private:
51-
void init(std::string const& override, bool useLFNasPFNifLFNnotFound, edm::CatalogType catType);
50+
void init(std::vector<std::string> logicalFileNames,
51+
std::string const& override,
52+
bool useLFNasPFNifLFNnotFound,
53+
edm::CatalogType catType);
5254
void findFile(std::string const& lfn,
5355
std::vector<std::string>& pfns,
5456
bool useLFNasPFNifLFNnotFound,
5557
edm::CatalogType catType);
56-
std::vector<std::string> logicalFileNames_;
57-
std::vector<std::string> fileNames_;
5858
std::vector<FileCatalogItem> fileCatalogItems_;
5959
edm::propagate_const<std::unique_ptr<FileLocator>> overrideFileLocator_;
6060

FWCore/Catalog/src/InputFileCatalog.cc

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,12 +17,12 @@
1717

1818
namespace edm {
1919

20-
InputFileCatalog::InputFileCatalog(std::vector<std::string> const& fileNames,
20+
InputFileCatalog::InputFileCatalog(std::vector<std::string> fileNames,
2121
std::string const& override,
2222
bool useLFNasPFNifLFNnotFound,
2323
edm::CatalogType catType)
24-
: logicalFileNames_(fileNames), fileNames_(fileNames), fileCatalogItems_(), overrideFileLocator_() {
25-
init(override, useLFNasPFNifLFNnotFound, catType);
24+
: fileCatalogItems_(), overrideFileLocator_() {
25+
init(std::move(fileNames), override, useLFNasPFNifLFNnotFound, catType);
2626
}
2727

2828
InputFileCatalog::~InputFileCatalog() {}
@@ -36,7 +36,8 @@ namespace edm {
3636
return tmp;
3737
}
3838

39-
void InputFileCatalog::init(std::string const& inputOverride,
39+
void InputFileCatalog::init(std::vector<std::string> logicalFileNames,
40+
std::string const& inputOverride,
4041
bool useLFNasPFNifLFNnotFound,
4142
edm::CatalogType catType) {
4243
typedef std::vector<std::string>::iterator iter;
@@ -123,32 +124,31 @@ namespace edm {
123124
throw ex;
124125
}
125126

126-
for (iter it = fileNames_.begin(), lt = logicalFileNames_.begin(), itEnd = fileNames_.end(); it != itEnd;
127-
++it, ++lt) {
128-
boost::trim(*it);
127+
for (auto& lfn : logicalFileNames) {
128+
boost::trim(lfn);
129129
std::vector<std::string> pfns;
130-
if (it->empty()) {
130+
if (lfn.empty()) {
131131
cms::Exception ex("FileCatalog");
132132
ex << "An empty string specified in the fileNames parameter for input source";
133133
ex.addContext("Calling edm::InputFileCatalog::init()");
134134
throw ex;
135135
}
136-
if (isPhysical(*it)) {
137-
if (it->back() == ':') {
136+
if (isPhysical(lfn)) {
137+
if (lfn.back() == ':') {
138138
cms::Exception ex("FileCatalog");
139139
ex << "An empty physical file name specified in the fileNames parameter for input source";
140140
ex.addContext("Calling edm::InputFileCatalog::init()");
141141
throw ex;
142142
}
143-
pfns.push_back(*it);
143+
pfns.push_back(lfn);
144144
// Clear the LFN.
145-
lt->clear();
145+
lfn.clear();
146146
} else {
147-
boost::trim(*lt);
148-
findFile(*lt, pfns, useLFNasPFNifLFNnotFound, catType);
147+
findFile(lfn, pfns, useLFNasPFNifLFNnotFound, catType);
149148
}
149+
lfn.shrink_to_fit(); // try to release memory
150150

151-
fileCatalogItems_.push_back(FileCatalogItem(pfns, *lt));
151+
fileCatalogItems_.emplace_back(std::move(pfns), std::move(lfn));
152152
}
153153
}
154154

FWCore/Catalog/test/BuildFile.xml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<use name="FWCore/Catalog"/>
22
<use name="FWCore/ServiceRegistry"/>
33
<use name="catch2"/>
4-
<bin name="edmCatalogFileLocator_t" file="FileLocator_t.cpp">
4+
<bin name="TestFWCoreCatalog" file="FileLocator_t.cpp,InputFileCatalog_t.cpp">
55
</bin>

FWCore/Catalog/test/FileLocator_t.cpp

Lines changed: 11 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -2,96 +2,24 @@
22
#include "FWCore/Catalog/interface/SiteLocalConfig.h"
33
#include "FWCore/Utilities/interface/Exception.h"
44
#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"
5-
#include <filesystem>
65

6+
#include "TestSiteLocalConfig.h"
7+
8+
#include <filesystem>
79
#include <string>
810

911
#define CATCH_CONFIG_MAIN
1012
#include "catch.hpp"
1113

12-
namespace {
13-
class TestSiteLocalConfig : public edm::SiteLocalConfig {
14-
public:
15-
//constructor using trivial data catalogs
16-
TestSiteLocalConfig(std::vector<std::string> catalogs) : m_trivialCatalogs(std::move(catalogs)) {}
17-
//constructor using Rucio data catalogs
18-
TestSiteLocalConfig(std::vector<edm::CatalogAttributes> catalogs) : m_catalogs(std::move(catalogs)) {}
19-
std::vector<std::string> const& trivialDataCatalogs() const final { return m_trivialCatalogs; }
20-
std::vector<edm::CatalogAttributes> const& dataCatalogs() const final { return m_catalogs; }
21-
std::filesystem::path const storageDescriptionPath(const edm::CatalogAttributes& aDataCatalog) const final {
22-
return std::filesystem::path();
23-
}
24-
25-
std::string const lookupCalibConnect(std::string const& input) const final { return std::string(); }
26-
std::string const rfioType(void) const final { return std::string(); }
27-
28-
std::string const* sourceCacheTempDir() const final { return nullptr; }
29-
double const* sourceCacheMinFree() const final { return nullptr; }
30-
std::string const* sourceCacheHint() const final { return nullptr; }
31-
std::string const* sourceCloneCacheHint() const final { return nullptr; }
32-
std::string const* sourceReadHint() const final { return nullptr; }
33-
unsigned int const* sourceTTreeCacheSize() const final { return nullptr; }
34-
unsigned int const* sourceTimeout() const final { return nullptr; }
35-
bool enablePrefetching() const final { return false; }
36-
unsigned int debugLevel() const final { return 0; }
37-
std::vector<std::string> const* sourceNativeProtocols() const final { return nullptr; }
38-
struct addrinfo const* statisticsDestination() const final { return nullptr; }
39-
std::set<std::string> const* statisticsInfo() const final { return nullptr; }
40-
std::string const& siteName(void) const final { return m_emptyString; }
41-
std::string const& subSiteName(void) const final { return m_emptyString; }
42-
bool useLocalConnectString() const final { return false; }
43-
std::string const& localConnectPrefix() const final { return m_emptyString; }
44-
std::string const& localConnectSuffix() const final { return m_emptyString; }
45-
46-
private:
47-
std::vector<std::string> m_trivialCatalogs;
48-
std::vector<edm::CatalogAttributes> m_catalogs;
49-
std::filesystem::path m_storageDescription_path;
50-
std::string m_emptyString;
51-
};
52-
} // namespace
53-
54-
TEST_CASE("FileLocator with Rucio data catalog", "[filelocatorRucioDataCatalog]") {
55-
//catalog for testing "prefix"
56-
edm::CatalogAttributes aCatalog;
57-
aCatalog.site = "T1_US_FNAL";
58-
aCatalog.subSite = "T1_US_FNAL";
59-
aCatalog.storageSite = "T1_US_FNAL";
60-
aCatalog.volume = "American_Federation";
61-
aCatalog.protocol = "XRootD";
62-
std::vector<edm::CatalogAttributes> tmp{aCatalog};
63-
//catalog for testing "rules"
64-
aCatalog.site = "T1_US_FNAL";
65-
aCatalog.subSite = "T1_US_FNAL";
66-
aCatalog.storageSite = "T1_US_FNAL";
67-
aCatalog.volume = "FNAL_dCache_EOS";
68-
aCatalog.protocol = "XRootD";
69-
tmp.push_back(aCatalog);
70-
//catalog for testing chained "rules"
71-
aCatalog.site = "T1_US_FNAL";
72-
aCatalog.subSite = "T1_US_FNAL";
73-
aCatalog.storageSite = "T1_US_FNAL";
74-
aCatalog.volume = "FNAL_dCache_EOS";
75-
aCatalog.protocol = "root";
76-
tmp.push_back(aCatalog);
77-
78-
//create the services
79-
edm::ServiceToken tempToken(
80-
edm::ServiceRegistry::createContaining(std::unique_ptr<edm::SiteLocalConfig>(new TestSiteLocalConfig(tmp))));
81-
82-
std::string CMSSW_BASE(std::getenv("CMSSW_BASE"));
83-
std::string CMSSW_RELEASE_BASE(std::getenv("CMSSW_RELEASE_BASE"));
84-
std::string file_name("/src/FWCore/Catalog/test/storage.json");
85-
std::string full_file_name = std::filesystem::exists((CMSSW_BASE + file_name).c_str())
86-
? CMSSW_BASE + file_name
87-
: CMSSW_RELEASE_BASE + file_name;
14+
TEST_CASE("FileLocator with Rucio data catalog", "[FWCore/Catalog]") {
15+
edm::ServiceToken tempToken = edmtest::catalog::makeTestSiteLocalConfigToken();
8816

8917
SECTION("prefix") {
9018
edm::ServiceRegistry::Operate operate(tempToken);
9119
//empty catalog
9220
edm::CatalogAttributes tmp_cat;
9321
//use the first catalog provided by site local config
94-
edm::FileLocator fl(tmp_cat, 0, full_file_name);
22+
edm::FileLocator fl(tmp_cat, 0);
9523
CHECK("root://cmsxrootd.fnal.gov/store/group/bha/bho" ==
9624
fl.pfn("/store/group/bha/bho", edm::CatalogType::RucioCatalog));
9725
}
@@ -100,7 +28,7 @@ TEST_CASE("FileLocator with Rucio data catalog", "[filelocatorRucioDataCatalog]"
10028
//empty catalog
10129
edm::CatalogAttributes tmp_cat;
10230
//use the second catalog provided by site local config
103-
edm::FileLocator fl(tmp_cat, 1, full_file_name);
31+
edm::FileLocator fl(tmp_cat, 1);
10432
const std::array<const char*, 7> lfn = {{"/bha/bho",
10533
"bha",
10634
"file:bha",
@@ -119,7 +47,7 @@ TEST_CASE("FileLocator with Rucio data catalog", "[filelocatorRucioDataCatalog]"
11947
//empty catalog
12048
edm::CatalogAttributes tmp_cat;
12149
//use the third catalog provided by site local config above
122-
edm::FileLocator fl(tmp_cat, 2, full_file_name);
50+
edm::FileLocator fl(tmp_cat, 2);
12351
const std::array<const char*, 7> lfn = {{"/bha/bho",
12452
"bha",
12553
"file:bha",
@@ -139,7 +67,7 @@ TEST_CASE("FileLocator with Rucio data catalog", "[filelocatorRucioDataCatalog]"
13967
}
14068
}
14169

142-
TEST_CASE("FileLocator", "[filelocator]") {
70+
TEST_CASE("FileLocator with TrivialFileCatalog", "[FWCore/Catalog]") {
14371
std::string CMSSW_BASE(std::getenv("CMSSW_BASE"));
14472
std::string CMSSW_RELEASE_BASE(std::getenv("CMSSW_RELEASE_BASE"));
14573
std::string file_name("/src/FWCore/Catalog/test/simple_catalog.xml");
@@ -149,8 +77,8 @@ TEST_CASE("FileLocator", "[filelocator]") {
14977

15078
//create the services
15179
std::vector<std::string> tmp{std::string("trivialcatalog_file:") + full_file_name + "?protocol=xrd"};
152-
edm::ServiceToken tempToken(
153-
edm::ServiceRegistry::createContaining(std::unique_ptr<edm::SiteLocalConfig>(new TestSiteLocalConfig(tmp))));
80+
edm::ServiceToken tempToken(edm::ServiceRegistry::createContaining(
81+
std::unique_ptr<edm::SiteLocalConfig>(std::make_unique<edmtest::catalog::TestSiteLocalConfig>(tmp))));
15482

15583
//make the services available
15684
SECTION("standard") {
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
#include "FWCore/Catalog/interface/InputFileCatalog.h"
2+
#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"
3+
4+
#include "TestSiteLocalConfig.h"
5+
6+
#include "catch.hpp"
7+
8+
TEST_CASE("InputFileCatalog with Rucio data catalog", "[FWCore/Catalog]") {
9+
edm::ServiceToken tempToken = edmtest::catalog::makeTestSiteLocalConfigToken();
10+
11+
SECTION("Empty") {
12+
edm::ServiceRegistry::Operate operate(tempToken);
13+
edm::InputFileCatalog catalog({}, "");
14+
REQUIRE(catalog.empty());
15+
}
16+
17+
SECTION("isPhysical") {
18+
REQUIRE(edm::InputFileCatalog::isPhysical(""));
19+
REQUIRE(not edm::InputFileCatalog::isPhysical("/foo/bar"));
20+
REQUIRE(edm::InputFileCatalog::isPhysical("file:/foo/bar"));
21+
}
22+
23+
SECTION("Default behavior") {
24+
edm::ServiceRegistry::Operate operate(tempToken);
25+
26+
edm::InputFileCatalog catalog(std::vector<std::string>{"/store/foo/bar", " file:/foo/bar", "root://foobar "}, "");
27+
28+
SECTION("fileNames") {
29+
SECTION("Catalog 0") {
30+
auto const names = catalog.fileNames(0);
31+
REQUIRE(names.size() == 3);
32+
CHECK(names[0] == "root://cmsxrootd.fnal.gov/store/foo/bar");
33+
CHECK(names[1] == "file:/foo/bar");
34+
CHECK(names[2] == "root://foobar");
35+
}
36+
// The fileNames() works only for catalog 0
37+
// Catalog 1 or 2 leads to a crash because the input file list
38+
// is a mixture of LFNs and PFNs
39+
// This isn't really good behavior...
40+
}
41+
42+
SECTION("fileCatalogItems") {
43+
auto const& items = catalog.fileCatalogItems();
44+
REQUIRE(items.size() == 3);
45+
CHECK(items[0].logicalFileName() == "/store/foo/bar");
46+
CHECK(items[1].logicalFileName() == "");
47+
CHECK(items[2].logicalFileName() == "");
48+
49+
REQUIRE(items[0].fileNames().size() == 3);
50+
REQUIRE(items[1].fileNames().size() == 1);
51+
REQUIRE(items[2].fileNames().size() == 1);
52+
53+
SECTION("Catalog 0") {
54+
CHECK(items[0].fileName(0) == "root://cmsxrootd.fnal.gov/store/foo/bar");
55+
CHECK(items[1].fileName(0) == "file:/foo/bar");
56+
CHECK(items[2].fileName(0) == "root://foobar");
57+
}
58+
SECTION("Catalog 1") {
59+
CHECK(items[0].fileName(1) == "root://cmsdcadisk.fnal.gov//dcache/uscmsdisk/store/foo/bar");
60+
}
61+
SECTION("Catalog 2") { CHECK(items[0].fileName(2) == "root://host.domain//pnfs/cms/store/foo/bar"); }
62+
}
63+
}
64+
65+
SECTION("Override catalog") {
66+
edm::ServiceRegistry::Operate operate(tempToken);
67+
68+
edm::InputFileCatalog catalog(std::vector<std::string>{"/store/foo/bar", " file:/foo/bar", "root://foobar "},
69+
"T1_US_FNAL,,T1_US_FNAL,FNAL_dCache_EOS,XRootD");
70+
71+
SECTION("fileNames") {
72+
auto const names = catalog.fileNames(0);
73+
REQUIRE(names.size() == 3);
74+
CHECK(names[0] == "root://cmsdcadisk.fnal.gov//dcache/uscmsdisk/store/foo/bar");
75+
CHECK(names[1] == "file:/foo/bar");
76+
CHECK(names[2] == "root://foobar");
77+
}
78+
79+
SECTION("fileCatalogItems") {
80+
auto const& items = catalog.fileCatalogItems();
81+
REQUIRE(items.size() == 3);
82+
83+
CHECK(items[0].logicalFileName() == "/store/foo/bar");
84+
CHECK(items[1].logicalFileName() == "");
85+
CHECK(items[2].logicalFileName() == "");
86+
87+
REQUIRE(items[0].fileNames().size() == 1);
88+
REQUIRE(items[1].fileNames().size() == 1);
89+
REQUIRE(items[2].fileNames().size() == 1);
90+
91+
CHECK(items[0].fileName(0) == "root://cmsdcadisk.fnal.gov//dcache/uscmsdisk/store/foo/bar");
92+
CHECK(items[1].fileName(0) == "file:/foo/bar");
93+
CHECK(items[2].fileName(0) == "root://foobar");
94+
}
95+
}
96+
97+
SECTION("useLFNasPFNifLFNnotFound") {
98+
edm::ServiceRegistry::Operate operate(tempToken);
99+
100+
edm::InputFileCatalog catalog(
101+
std::vector<std::string>{"/store/foo/bar", "/tmp/foo/bar", "root://foobar "}, "", true);
102+
103+
SECTION("fileNames") {
104+
SECTION("Catalog 0") {
105+
auto const names = catalog.fileNames(0);
106+
REQUIRE(names.size() == 3);
107+
CHECK(names[0] == "root://cmsxrootd.fnal.gov/store/foo/bar");
108+
CHECK(names[1] == "/tmp/foo/bar");
109+
CHECK(names[2] == "root://foobar");
110+
}
111+
}
112+
113+
SECTION("fileCatalogItems") {
114+
auto const& items = catalog.fileCatalogItems();
115+
REQUIRE(items.size() == 3);
116+
CHECK(items[0].logicalFileName() == "/store/foo/bar");
117+
CHECK(items[1].logicalFileName() == "/tmp/foo/bar");
118+
CHECK(items[2].logicalFileName() == "");
119+
120+
REQUIRE(items[0].fileNames().size() == 3);
121+
REQUIRE(items[1].fileNames().size() == 3);
122+
REQUIRE(items[2].fileNames().size() == 1);
123+
124+
SECTION("Catalog 0") {
125+
CHECK(items[0].fileName(0) == "root://cmsxrootd.fnal.gov/store/foo/bar");
126+
CHECK(items[1].fileName(0) == "/tmp/foo/bar");
127+
CHECK(items[2].fileName(0) == "root://foobar");
128+
}
129+
SECTION("Catalog 1") {
130+
CHECK(items[0].fileName(1) == "root://cmsdcadisk.fnal.gov//dcache/uscmsdisk/store/foo/bar");
131+
CHECK(items[1].fileName(1) == "/tmp/foo/bar");
132+
}
133+
SECTION("Catalog 2") {
134+
CHECK(items[0].fileName(2) == "root://host.domain//pnfs/cms/store/foo/bar");
135+
CHECK(items[1].fileName(2) == "/tmp/foo/bar");
136+
}
137+
}
138+
}
139+
}

0 commit comments

Comments
 (0)