Skip to content

Commit eb6319b

Browse files
authored
Merge pull request #48073 from makortel/storageTrace
Add storage tracer
2 parents 9d6d370 + 2477b0f commit eb6319b

26 files changed

+1233
-66
lines changed

FWCore/Services/src/SiteLocalConfigService.cc

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -576,10 +576,12 @@ namespace edm {
576576
"Specify the file containing the site local config. Empty string will load from default directory.");
577577
desc.addOptionalUntracked<std::string>("overrideSourceCacheTempDir");
578578
desc.addOptionalUntracked<double>("overrideSourceCacheMinFree");
579-
desc.addOptionalUntracked<std::string>("overrideSourceCacheHintDir");
579+
desc.addOptionalUntracked<std::string>("overrideSourceCacheHintDir")
580+
->setComment("Set cache hint. See AdaptorConfig plugin for valid values.");
580581
desc.addOptionalUntracked<std::string>("overrideSourceCloneCacheHintDir")
581582
->setComment("Provide an alternate cache hint for fast cloning.");
582-
desc.addOptionalUntracked<std::string>("overrideSourceReadHint");
583+
desc.addOptionalUntracked<std::string>("overrideSourceReadHint")
584+
->setComment("Set read hint. See AdaptorConfig plugin for valid values.");
583585
desc.addOptionalUntracked<std::vector<std::string> >("overrideSourceNativeProtocols");
584586
desc.addOptionalUntracked<unsigned int>("overrideSourceTTreeCacheSize");
585587
desc.addOptionalUntracked<unsigned int>("overrideSourceTimeout");

IOPool/TFileAdaptor/src/TFileAdaptor.cc

Lines changed: 67 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,14 @@
55
#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h"
66
#include "FWCore/ParameterSet/interface/ParameterSet.h"
77
#include "FWCore/ParameterSet/interface/ParameterSetDescription.h"
8+
#include "FWCore/ParameterSet/interface/PluginDescription.h"
89
#include "FWCore/Reflection/interface/SetClassParsing.h"
910
#include "FWCore/ServiceRegistry/interface/Service.h"
1011
#include "FWCore/Utilities/interface/EDMException.h"
1112
#include "Utilities/StorageFactory/interface/StorageAccount.h"
1213
#include "Utilities/StorageFactory/interface/StorageFactory.h"
14+
#include "Utilities/StorageFactory/interface/StorageProxyMaker.h"
15+
#include "Utilities/StorageFactory/interface/StorageProxyMakerFactory.h"
1316

1417
#include <TROOT.h>
1518
#include <TFile.h>
@@ -68,32 +71,26 @@ bool TFileAdaptor::native(char const* proto) const {
6871
}
6972

7073
TFileAdaptor::TFileAdaptor(edm::ParameterSet const& pset, edm::ActivityRegistry& ar)
71-
: enabled_(true),
72-
doStats_(true),
74+
: enabled_(pset.getUntrackedParameter<bool>("enable")),
75+
doStats_(pset.getUntrackedParameter<bool>("stats")),
7376
enablePrefetching_(false),
74-
cacheHint_("auto-detect"),
75-
readHint_("auto-detect"),
76-
tempDir_(),
77-
minFree_(0),
77+
// values set in the site local config or in SiteLocalConfigService override
78+
// any values set here for this service.
79+
// These parameters here are needed only for backward compatibility
80+
// for WMDM tools until we switch to only using the site local config for this info.
81+
cacheHint_(pset.getUntrackedParameter<std::string>("cacheHint")),
82+
readHint_(pset.getUntrackedParameter<std::string>("readHint")),
83+
tempDir_(pset.getUntrackedParameter<std::string>("tempDir")),
84+
minFree_(pset.getUntrackedParameter<double>("tempMinFree")),
85+
native_(pset.getUntrackedParameter<std::vector<std::string>>("native")),
86+
// end of section of values overridden by SiteLocalConfigService
7887
timeout_(0U),
79-
debugLevel_(0U),
80-
native_() {
81-
if (!(enabled_ = pset.getUntrackedParameter<bool>("enable", enabled_)))
88+
debugLevel_(0U) {
89+
if (not enabled_)
8290
return;
8391

8492
using namespace edm::storage;
8593
StorageFactory* f = StorageFactory::getToModify();
86-
doStats_ = pset.getUntrackedParameter<bool>("stats", doStats_);
87-
88-
// values set in the site local config or in SiteLocalConfigService override
89-
// any values set here for this service.
90-
// These parameters here are needed only for backward compatibility
91-
// for WMDM tools until we switch to only using the site local config for this info.
92-
cacheHint_ = pset.getUntrackedParameter<std::string>("cacheHint", cacheHint_);
93-
readHint_ = pset.getUntrackedParameter<std::string>("readHint", readHint_);
94-
tempDir_ = pset.getUntrackedParameter<std::string>("tempDir", f->tempPath());
95-
minFree_ = pset.getUntrackedParameter<double>("tempMinFree", f->tempMinFree());
96-
native_ = pset.getUntrackedParameter<std::vector<std::string> >("native", native_);
9794

9895
ar.watchPostEndJob(this, &TFileAdaptor::termination);
9996

@@ -161,6 +158,15 @@ TFileAdaptor::TFileAdaptor(edm::ParameterSet const& pset, edm::ActivityRegistry&
161158
// tell where to save files.
162159
f->setTempDir(tempDir_, minFree_);
163160

161+
// forward generic storage proxy makers
162+
{
163+
std::vector<std::unique_ptr<StorageProxyMaker>> makers;
164+
for (auto const& pset : pset.getUntrackedParameter<std::vector<edm::ParameterSet>>("storageProxies")) {
165+
makers.push_back(StorageProxyMakerFactory::get()->create(pset.getUntrackedParameter<std::string>("type"), pset));
166+
}
167+
f->setStorageProxyMakers(std::move(makers));
168+
}
169+
164170
// set our own root plugins
165171
TPluginManager* mgr = gROOT->GetPluginManager();
166172

@@ -203,15 +209,49 @@ TFileAdaptor::TFileAdaptor(edm::ParameterSet const& pset, edm::ActivityRegistry&
203209
}
204210

205211
void TFileAdaptor::fillDescriptions(edm::ConfigurationDescriptions& descriptions) {
212+
using namespace edm::storage;
206213
edm::ParameterSetDescription desc;
207-
desc.addOptionalUntracked<bool>("enable");
208-
desc.addOptionalUntracked<bool>("stats");
209-
desc.addOptionalUntracked<std::string>("cacheHint");
210-
desc.addOptionalUntracked<std::string>("readHint");
211-
desc.addOptionalUntracked<std::string>("tempDir");
212-
desc.addOptionalUntracked<double>("tempMinFree");
213-
desc.addOptionalUntracked<std::vector<std::string> >("native");
214+
desc.addUntracked<bool>("enable", true)->setComment("Enable or disable TFileAdaptor behavior");
215+
desc.addUntracked<bool>("stats", true);
216+
desc.addUntracked<std::string>("cacheHint", "auto-detect")
217+
->setComment(
218+
"Hint for read caching. Possible values: 'application-only', 'storage-only', 'lazy-download', 'auto-detect'. "
219+
"The value from the SiteLocalConfigService overrides the value set here. In addition, if the "
220+
"SiteLocalConfigService has prefetching enabled, the default hint is 'application-only'.");
221+
desc.addUntracked<std::string>("readHint", "auto-detect")
222+
->setComment(
223+
"Hint for reading itself. Possible values: 'direct-unbuffered', 'read-ahead-buffered', 'auto-detect'. The "
224+
"value from SiteLocalConfigService overrides the value set here.");
225+
desc.addUntracked<std::string>("tempDir", StorageFactory::defaultTempDir())
226+
->setComment(
227+
"Colon-separated list of directories that storage implementations downloading the full file could place the "
228+
"file. The value from SiteLocalConfigService overrides the value set here.");
229+
desc.addUntracked<double>("tempMinFree", StorageFactory::defaultMinTempFree())
230+
->setComment(
231+
"Minimum amount of space in GB required for a temporary data directory specified in tempDir. The value from "
232+
"SiteLocalConfigService overrides the value set here.");
233+
desc.addUntracked<std::vector<std::string>>("native", {})
234+
->setComment(
235+
"Set of protocols for which to use a native ROOT storage implementation instead of CMSSW's StorageFactory. "
236+
"Valid "
237+
"values are 'file', 'http', 'ftp', 'dcache', 'dcap', 'gsidcap', 'root', or 'all' to prefer ROOT for all "
238+
"protocols. The value from SiteLocalConfigService overrides the value set here.");
239+
240+
edm::ParameterSetDescription proxyMakerDesc;
241+
proxyMakerDesc.addNode(edm::PluginDescription<edm::storage::StorageProxyMakerFactory>("type", false));
242+
std::vector<edm::ParameterSet> proxyMakerDefaults;
243+
desc.addVPSetUntracked("storageProxies", proxyMakerDesc, proxyMakerDefaults)
244+
->setComment(
245+
"Ordered list of Storage proxies the real Storage object is wrapped into. The real Storage is wrapped into "
246+
"the first element of the list, then that proxy is wrapped into the second element of the list and so on. "
247+
"Only after this wrapping are the LocalCacheFile (lazy-download) and statistics accounting ('stats' "
248+
"parameter) proxies applied.");
249+
214250
descriptions.add("AdaptorConfig", desc);
251+
descriptions.setComment(
252+
"AdaptorConfig Service is used to configure the TFileAdaptor. If enabled, the TFileAdaptor registers "
253+
"TStorageFactoryFile as a handler for various protocols. The StorageFactory facility provides custom storage "
254+
"access implementations for these protocols, as well as statistics accounting.");
215255
}
216256

217257
// Write current Storage statistics on a ostream

IOPool/TFileAdaptor/src/TFileAdaptor.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,9 +44,9 @@ class TFileAdaptor {
4444
std::string readHint_;
4545
std::string tempDir_;
4646
double minFree_;
47+
std::vector<std::string> native_;
4748
unsigned int timeout_;
4849
unsigned int debugLevel_;
49-
std::vector<std::string> native_;
5050
};
5151

5252
namespace edm {

Utilities/DCacheAdaptor/plugins/DCacheStorageMaker.cc

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ namespace edm::storage {
4141
else
4242
mode |= IOFlags::OpenUnbuffered;
4343

44-
auto file = std::make_unique<DCacheFile>(normalise(proto, path), mode);
45-
return f->wrapNonLocalFile(std::move(file), proto, std::string(), mode);
44+
return std::make_unique<DCacheFile>(normalise(proto, path), mode);
4645
}
4746

4847
void stagein(const std::string &proto, const std::string &path, const AuxSettings &aux) const override {
@@ -77,6 +76,8 @@ namespace edm::storage {
7776
return true;
7877
}
7978

79+
UseLocalFile usesLocalFile() const override { return UseLocalFile::kNo; }
80+
8081
private:
8182
void setTimeout(unsigned int timeout) const {
8283
if (timeout != 0)

Utilities/DavixAdaptor/plugins/DavixStorageMaker.cc

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,8 @@ namespace edm::storage {
1515
const std::string &path,
1616
int mode,
1717
AuxSettings const &aux) const override {
18-
const StorageFactory *f = StorageFactory::get();
1918
std::string newurl((proto == "web" ? "http" : proto) + ":" + path);
20-
auto file = std::make_unique<DavixFile>(newurl, mode);
21-
return f->wrapNonLocalFile(std::move(file), proto, std::string(), mode);
19+
return std::make_unique<DavixFile>(newurl, mode);
2220
}
2321

2422
bool check(const std::string &proto,
@@ -43,6 +41,8 @@ namespace edm::storage {
4341
}
4442
return true;
4543
}
44+
45+
UseLocalFile usesLocalFile() const override { return UseLocalFile::kNo; }
4646
};
4747
} // namespace edm::storage
4848

Utilities/StorageFactory/BuildFile.xml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
<use name="FWCore/ParameterSet"/>
12
<use name="FWCore/PluginManager"/>
23
<use name="FWCore/MessageLogger"/>
34
<use name="FWCore/Utilities"/>

Utilities/StorageFactory/README.md

Lines changed: 55 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ Factory interface for constructing `edm::storage::Storage` instances. Also provi
1111
`StorageFactory` provides two implementations of `edm::storage::Storage` classes which can be used to wrap around any other `Storage` object.
1212

1313
### `edm::storage::LocalCacheFile`
14-
Does memory mapped caching of the wrapped `Storage` object. This is only applied if `CACHE_HINT_LAZY_DOWNLOAD` is set for `cacheHint` or the protocol handling code explicit passes `IOFlags::OpenWrap` to `StorageFactory::wrapNonLocalFile`. The wrapping does not happen if the Storage is open for writing nor if the Storage is associated with a file on the local file system.
14+
Does memory mapped caching of the wrapped `Storage` object. This is only applied if `CACHE_HINT_LAZY_DOWNLOAD` is set for `cacheHint` or the protocol handling code explicit passes `IOFlags::OpenWrap` to `StorageFactory::wrapNonLocalFile`. The wrapping does not happen if the Storage is open for writing nor if the Storage is associated with a file on the local file system. Note that files using the `file:` protocol _can_ end up using `LocalCacheFile` if the path is determined to be on a non-local file system.
1515

1616
### `edm::storage::StorageAccountProxy`
1717
This wraps the `Storage` object and provides per protocol accounting information (e.g. number of bytes read) to `edm::storage::StorageAccount`. This is only used if `StorageFactory::accounting()` returns `true`.
@@ -27,16 +27,66 @@ A singleton used to aggragate statistics about all storage calls for each protoc
2727
### `edm::storage::StorageAccount::StorageClassToken`
2828
Each protocol is associated to a token for quick lookup.
2929

30+
31+
## Generic storage proxies
32+
33+
This facility resembles the `edm::storage::LocalCacheFile` and `edm::storage::StorageAccountProxy` in the way that `edm::storage::Storage` objects constructed by the concrete `edm::storage::StorageMaker` are wrapped into other `edm::storage::Storage` objects.
34+
35+
The proxies are configured via `TFileAdaptor`'s `storageProxies` `VPSet` configuration parameter. The proxies are wrapped in the order they are specified in the `VPSet`, i.e. the first element wraps the concrete `edm::storage::Storage`, second element wraps the first element etc. The `edm::storage::StorageAccountProxy` and `edm::storage::LocalCacheFile` wrap the last storage proxy according to their usual behavior.
36+
37+
Each concrete proxy comes with two classes, the proxy class itself (inheriting from the `edm::storage::StorageProxyBase`) and a maker class (inheriting from the `edm::storage::StorageProxyMaker`). This "factory of factories" pattern is used because a maker is created once per job (in `TFileAdaptor`), and the maker object is used to create a proxy object for each file.
38+
39+
### Concrete proxy classes
40+
41+
The convention is to use the proxy class name as the plugin name for the maker, as the proxy is really what the user would care for. The headings of the subsections correspond to the plugin names.
42+
43+
#### `StorageTracerProxy`
44+
45+
The `edm::storage::StorageTracerProxy` (and the corresponding `edm::storage::StorageTracerProxyMaker`) produces a text file with a trace of all IO operations at the `StorageFactory` level. The behavior of each concrete `Storage` object (such as further splitting of read requests in `XrdAdaptor`) is not captured in these tracers. The structure of the trace file is described in a preamble in the trace file.
46+
47+
The plugin has a configuration parameter for a pattern for the trace files. The pattern must contain at least one `%I`. The maker has an atomic counter for the files, and all occurrences of `%I` are replaced with the value of that counter for the given file.
48+
49+
There is an `edmStorageTracer.py` script for doing some analyses of the traces.
50+
51+
The `StorageTracerProxy` also provides a way to correlate the trace entries with the rest of the framework via [MessageLogger](../../FWCore/MessageService/Readme.md) messages. These messages are issued with the DEBUG severity and `IOTrace` category. There are additional, higher-level messages as part of the `PoolSource`. To see these messages, compile the `Utilities/Storage` and `IOPool/Input` packages with `USER_CXXFLAGS="-DEDM_ML_DEBUG", and customize the MessageLogger configuration along
52+
```py
53+
process.MessageLogger.cerr.threshold = "DEBUG"
54+
process.MessageLogger.debugModules = ["*"]
55+
process.MessageLogger.IOTrace = dict()
56+
```
57+
58+
#### `StorageAddLatencyProxy`
59+
60+
The `edm::storage::StorageAddLatencyProxy` (and the corresponding `edm::storage::StorageAddLatencyProxyMaker`) can be used to add artifical latency to the IO operations. The plugin has configuration parameters for latencies of singular reads, vector reads, singular writes, and vector writes.
61+
62+
If used together with `StorageTracerProxy` to e.g. simulate the behavior of high-latency storage systems with e.g. local files, the `storageProxies` `VPSet` should have `StorageAddLatencyProxy` first, followed by `StorageTracerProxy`.
63+
64+
### Other components
65+
66+
#### `edm::storage::StorageProxyBase`
67+
68+
Inherits from `edm::storage::Storage` and is the base class for the proxy classes.
69+
70+
#### `edm::storage::StorageProxyMaker`
71+
72+
Base class for the proxy makers.
73+
74+
3075
## Related classes in other packages
3176

3277
### TStorageFactoryFile
3378
Inherits from `TFile` but uses `edm::storage::Storage` instances when doing the actual read/write operations. The class explicitly uses `"tstoragefile"` when communicating with `edm::storage::StorageAccount`.
3479

35-
### TFileAdaptor
36-
TFileAdaptor is a cmsRun Service. It explicitly registers the use of `TStorageFactoryFile` with ROOT's `TFile::Open` system. The parameters passed to `TFileAdaptor` are relayed to `edm::storage::StorageFactory` to setup the defaults for the job.
80+
### `TFileAdaptor`
3781

38-
### CondorStatusService
82+
`TFileAdaptor` is a cmsRun Service (with a plugin name of `AdaptorConfig`, see [IOPool/TFileAdaptor/README.md](../../IOPool/TFileAdaptor/README.md)). It explicitly registers the use of `TStorageFactoryFile` with ROOT's `TFile::Open` system. The parameters passed to `TFileAdaptor` are relayed to `edm::storage::StorageFactory` to setup the defaults for the job.
83+
84+
### `CondorStatusService`
3985
Sends condor _Chirp_ messages periodically from cmsRun. These include the most recent aggregated `edm::storage::StorageAccount` information for all protocols being used except for the `"tstoragefile"` protocol.
4086

41-
### StatisticsSenderService
87+
### `StatisticsSenderService`
4288
A cmsRun Service which sends out UDP packets about the state of the system. The information is sent when a primary file closes and includes the recent aggregated `edm::storage::StorageAccount` information for all protocols being used except for the `"tstoragefile"` protocol.
89+
90+
### `XrdAdaptor`
91+
92+
A `edm::storage::Storage` implementation for xrootd (see [Utilities/XrdAdaptor/README.md](../../Utilities/XrdAdaptor/README.md)).

Utilities/StorageFactory/interface/StorageFactory.h

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,16 @@
55
#include "Utilities/StorageFactory/interface/LocalFileSystem.h"
66
#include "Utilities/StorageFactory/interface/IOTypes.h"
77
#include "Utilities/StorageFactory/interface/IOFlags.h"
8-
#include <string>
8+
99
#include <memory>
10+
#include <string>
11+
#include <tuple>
12+
1013
#include "oneapi/tbb/concurrent_unordered_map.h"
1114

1215
namespace edm::storage {
1316
class Storage;
17+
class StorageProxyMaker;
1418
class StorageFactory {
1519
public:
1620
enum CacheHint { CACHE_HINT_APPLICATION, CACHE_HINT_STORAGE, CACHE_HINT_LAZY_DOWNLOAD, CACHE_HINT_AUTO_DETECT };
@@ -20,6 +24,10 @@ namespace edm::storage {
2024
static const StorageFactory *get(void);
2125
static StorageFactory *getToModify(void);
2226

27+
// in GB
28+
static double defaultMinTempFree() { return 4.; }
29+
static std::string defaultTempDir();
30+
2331
~StorageFactory(void);
2432

2533
// implicit copy constructor
@@ -45,22 +53,27 @@ namespace edm::storage {
4553
std::string tempPath(void) const;
4654
double tempMinFree(void) const;
4755

56+
void setStorageProxyMakers(std::vector<std::unique_ptr<StorageProxyMaker>> makers);
57+
4858
void stagein(const std::string &url) const;
49-
std::unique_ptr<Storage> open(const std::string &url, int mode = IOFlags::OpenRead) const;
59+
std::unique_ptr<Storage> open(const std::string &url, const int mode = IOFlags::OpenRead) const;
5060
bool check(const std::string &url, IOOffset *size = nullptr) const;
5161

52-
std::unique_ptr<Storage> wrapNonLocalFile(std::unique_ptr<Storage> s,
53-
const std::string &proto,
54-
const std::string &path,
55-
int mode) const;
56-
5762
private:
5863
typedef oneapi::tbb::concurrent_unordered_map<std::string, std::shared_ptr<StorageMaker>> MakerTable;
5964

6065
StorageFactory(void);
6166
StorageMaker *getMaker(const std::string &proto) const;
6267
StorageMaker *getMaker(const std::string &url, std::string &protocol, std::string &rest) const;
6368

69+
// Returns
70+
// - Storage 's' possibly wrapped in LocalCacheFile
71+
// - bool telling if LocalCacheFile is used
72+
std::tuple<std::unique_ptr<Storage>, bool> wrapNonLocalFile(std::unique_ptr<Storage> s,
73+
const std::string &proto,
74+
const std::string &path,
75+
const int mode) const;
76+
6477
mutable MakerTable m_makers;
6578
CacheHint m_cacheHint;
6679
ReadHint m_readHint;
@@ -72,6 +85,7 @@ namespace edm::storage {
7285
unsigned int m_timeout;
7386
unsigned int m_debugLevel;
7487
LocalFileSystem m_lfs;
88+
std::vector<std::unique_ptr<StorageProxyMaker>> m_storageProxyMakers_;
7589
static StorageFactory s_instance;
7690
};
7791
} // namespace edm::storage

0 commit comments

Comments
 (0)