Skip to content

Commit 9083bba

Browse files
committed
[roc-cleanup] Update tool to clean stale hugepage / uio_pci_dma resources
1 parent fb869c1 commit 9083bba

File tree

4 files changed

+81
-69
lines changed

4 files changed

+81
-69
lines changed

CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,7 +214,7 @@ if(PDA_FOUND)
214214
list(APPEND EXE_NAMES
215215
roc-bar-stress
216216
roc-config
217-
roc-channel-cleanup
217+
roc-cleanup
218218
roc-example
219219
roc-flash
220220
roc-flash-read

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -300,9 +300,10 @@ It may use files in these directories for DMA buffers:
300300
The program will report the exact file used.
301301
They can be inspected manually if needed, e.g. with hexdump: `hexdump -e '"%07_ax" " | " 4/8 "%08x " "\n"' [filename]`
302302
303-
### roc-channel-cleanup
304-
In the event of a serious crash, such as a segfault, it may be necessary to clean up and reset a channel.
305-
See section "Channel ownership lock" for more details.
303+
### roc-cleanup
304+
In the event of a serious crash, such as a segfault, it may be necessary to clean up and reset.
305+
This tool serves this purpose and is intended to be run as root. Be aware that this will make every
306+
running instance of readout.exe or roc-bench-dma fail.
306307
307308
### roc-config
308309
Configures the CRU. Uses the [Card Configurator](#card-configurator).

src/CommandLineUtilities/ProgramCleanup.cxx

Lines changed: 46 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
// or submit itself to any jurisdiction.
1010

1111
/// \file ProgramCleanup.cxx
12-
/// \brief Utility that cleans up channel state
12+
/// \brief Utility that cleans up stale readout/readoutcard resources
1313
///
1414
/// \author Pascal Boeschoten ([email protected])
15+
/// \author Kostas Alexopoulos ([email protected])
1516

17+
#include <boost/algorithm/string/predicate.hpp>
1618
#include <iostream>
1719
#include "ReadoutCard/ChannelFactory.h"
1820
#include "CommandLineUtilities/Common.h"
@@ -21,48 +23,59 @@
2123

2224
using namespace AliceO2::roc::CommandLineUtilities;
2325
using namespace AliceO2::roc;
24-
using std::cout;
25-
using std::endl;
26+
namespace algo = boost::algorithm;
2627
namespace po = boost::program_options;
2728

28-
namespace {
29-
30-
class ProgramCleanup: public Program
29+
class ProgramCleanup : public Program
3130
{
32-
public:
31+
public:
32+
virtual Description getDescription()
33+
{
34+
return { "Cleanup", "Cleans up ReadoutCard state", "roc-cleanup" };
35+
}
3336

34-
virtual Description getDescription()
35-
{
36-
return {"Cleanup", "Cleans up ReadoutCard state", "roc-cleanup --id=12345 --channel=0"};
37-
}
37+
virtual void addOptions(po::options_description&)
38+
{
39+
}
3840

39-
virtual void addOptions(po::options_description& options)
40-
{
41-
Options::addOptionCardId(options);
42-
Options::addOptionChannel(options);
43-
//options.add_options()("force",po::bool_switch(&mForceCleanup),
44-
// "Force cleanup of shared state files if normal cleanup fails");
41+
virtual void run(const po::variables_map&)
42+
{
43+
std::cout << "\033[1;31m"
44+
<< "!!! WARNING !!!"
45+
<< "\033[0m" << std::endl;
46+
std::cout << std::endl;
47+
std::cout << "Execution of this tool will:" << std::endl;
48+
std::cout << "1. Clean all hugepage resources under /var/lib/hugetlbfs/global/pagesize-{2MB, 1GB}/ which match readout* and roc-bench-dma*" << std::endl;
49+
std::cout << "2. Remove and reinsert the uio_pci_dma kernel module" << std::endl;
50+
std::cout << std::endl;
51+
std::cout << "In case instances of readout.exe or roc-bench-dma are running, they will fail." << std::endl;
52+
std::cout << std::endl;
53+
std::cout << "This tool is intended to be run with elevated privileges." << std::endl;
54+
std::cout << "Are you sure you want to continue? (yes/no)" << std::endl;
55+
std::string response;
56+
std::cin >> response;
57+
if (!algo::starts_with(response, "y")) {
58+
std::cout << "Terminated" << std::endl;
59+
return;
4560
}
4661

47-
virtual void run(const boost::program_options::variables_map& map)
48-
{
49-
auto cardId = Options::getOptionCardId(map);
50-
auto channelNumber = Options::getOptionChannel(map);
62+
std::cout << "Removing readout 2MB hugepage mappings" << std::endl;
63+
system("rm /var/lib/hugetlbfs/global/pagesize-2MB/readout*");
64+
std::cout << "Removing readout 1GB hugepage mappings" << std::endl;
65+
system("rm /var/lib/hugetlbfs/global/pagesize-1GB/readout*");
66+
std::cout << "Removing roc-bench-dma 2MB hugepage mappings" << std::endl;
67+
system("rm /var/lib/hugetlbfs/global/pagesize-2MB/roc-bench-dma*");
68+
std::cout << "Removing roc-bench-dma 1GB hugepage mappings" << std::endl;
69+
system("rm /var/lib/hugetlbfs/global/pagesize-1GB/roc-bench-dma*");
5170

52-
// This non-forced cleanup asks the DmaChannel to clean up itself.
53-
// It will not succeed if the channel was not initialized properly before the running of this program.
54-
cout << "### Attempting cleanup...\n";
55-
auto params = AliceO2::roc::Parameters::makeParameters(cardId, channelNumber);
56-
//params.setForcedUnlockEnabled(mForceCleanup);
57-
params.setBufferParameters(buffer_parameters::Null());
58-
auto channel = ChannelFactory().getDmaChannel(params);
59-
cout << "### Done!\n";
60-
}
71+
std::cout << "Removing uio_pci_dma" << std::endl;
72+
system("modprobe -r uio_pci_dma");
73+
std::cout << "Reinserting uio_pci_dma" << std::endl;
74+
system("modprobe uio_pci_dma");
75+
}
6176

62-
private:
63-
//bool mForceCleanup;
77+
private:
6478
};
65-
} // Anonymous namespace
6679

6780
int main(int argc, char** argv)
6881
{

src/MemoryMappedFile.cxx

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -21,19 +21,20 @@
2121
#include "ExceptionInternal.h"
2222
#include "Utilities/SmartPointer.h"
2323

24-
namespace AliceO2 {
25-
namespace roc {
24+
namespace AliceO2
25+
{
26+
namespace roc
27+
{
2628

2729
namespace b = boost;
2830
namespace bip = boost::interprocess;
2931
namespace bfs = boost::filesystem;
3032

31-
struct MemoryMappedFileInternal
32-
{
33-
std::string fileName;
34-
boost::interprocess::file_mapping fileMapping;
35-
boost::interprocess::mapped_region mappedRegion;
36-
bool deleteFileOnDestruction;
33+
struct MemoryMappedFileInternal {
34+
std::string fileName;
35+
boost::interprocess::file_mapping fileMapping;
36+
boost::interprocess::mapped_region mappedRegion;
37+
bool deleteFileOnDestruction;
3738
};
3839

3940
MemoryMappedFile::MemoryMappedFile()
@@ -42,26 +43,26 @@ MemoryMappedFile::MemoryMappedFile()
4243
}
4344

4445
MemoryMappedFile::MemoryMappedFile(const std::string& fileName, size_t fileSize, bool deleteFileOnDestruction,
45-
bool lockMap)
46-
: MemoryMappedFile()
46+
bool lockMap)
47+
: MemoryMappedFile()
4748
{
4849
mInternal->fileName = fileName;
4950
mInternal->deleteFileOnDestruction = deleteFileOnDestruction;
5051

5152
if (lockMap) {
5253
// Try to acquire the lock on the file
53-
try{
54+
try {
5455
Utilities::resetSmartPtr(mInterprocessLock, "Alice_O2_RoC_MMF_" + fileName + "_lock");
5556
} catch (const boost::exception& e) {
5657
BOOST_THROW_EXCEPTION(LockException()
57-
<< ErrorInfo::Message("Couldn't lock Memory Mapped File; " + boost::diagnostic_information(e)));
58+
<< ErrorInfo::Message("Couldn't lock Memory Mapped File; " + boost::diagnostic_information(e)));
5859
}
5960

6061
try {
6162
mMapAcquired = map(fileName, fileSize);
6263
} catch (const boost::exception& e) {
6364
BOOST_THROW_EXCEPTION(MemoryMapException()
64-
<< ErrorInfo::Message(boost::diagnostic_information(e)));
65+
<< ErrorInfo::Message(boost::diagnostic_information(e)));
6566
}
6667
}
6768
}
@@ -96,11 +97,11 @@ bool MemoryMappedFile::map(const std::string& fileName, size_t fileSize)
9697
auto dir = bfs::path(fileName.c_str()).parent_path();
9798
if (!(bfs::is_directory(dir) && bfs::exists(dir))) {
9899
BOOST_THROW_EXCEPTION(MemoryMapException()
99-
<< ErrorInfo::Message("Failed to open memory map file, parent directory does not exist"));
100+
<< ErrorInfo::Message("Failed to open memory map file, parent directory does not exist"));
100101
}
101102
}
102103

103-
// We don't care if the file exists.
104+
// We don't care if the file exists.
104105
// Locks are in place that make sure we don't get here unless we're allowed to
105106
// Check the file exists
106107
/*{
@@ -113,38 +114,35 @@ bool MemoryMappedFile::map(const std::string& fileName, size_t fileSize)
113114
// Similar operation to calling "touch" command, making sure the file exists
114115
try {
115116
std::ofstream ofs(fileName.c_str(), std::ios::app);
116-
}
117-
catch (const std::exception& e) {
117+
} catch (const std::exception& e) {
118118
BOOST_THROW_EXCEPTION(MemoryMapException()
119-
<< ErrorInfo::Message(std::string("Failed to open memory map file: ") + e.what()));
119+
<< ErrorInfo::Message(std::string("Failed to open memory map file: ") + e.what()));
120120
}
121121

122122
// Resize and map file to memory
123123
try {
124124
bfs::resize_file(fileName.c_str(), fileSize);
125-
}
126-
catch (const std::exception& e) {
125+
} catch (const std::exception& e) {
127126
BOOST_THROW_EXCEPTION(MemoryMapException()
128-
<< ErrorInfo::Message(std::string("Failed to resize memory map file: ") + e.what())
129-
<< ErrorInfo::PossibleCauses({
130-
"Size not a multiple of page size",
131-
"Not enough memory available",
132-
"Not enough memory available (check 'hugeadm --pool-list')",
133-
"Insufficient permissions"}));
127+
<< ErrorInfo::Message(std::string("Failed to resize memory map file: ") + e.what())
128+
<< ErrorInfo::PossibleCauses({ "Size not a multiple of page size",
129+
"Not enough memory available",
130+
"Not enough hugepages allocated (check 'hugeadm --pool-list')",
131+
"Insufficient permissions",
132+
"Stale hugepage / uio_pci_dma resources (run 'roc-cleanup')" }));
134133
}
135134

136135
try {
137136
mInternal->fileMapping = bip::file_mapping(fileName.c_str(), bip::read_write);
138137
mInternal->mappedRegion = bip::mapped_region(mInternal->fileMapping, bip::read_write, 0, fileSize);
139138
} catch (const std::exception& e) {
140139
BOOST_THROW_EXCEPTION(MemoryMapException()
141-
<< ErrorInfo::Message(std::string("Failed to memory map file: ") + e.what())
142-
<< ErrorInfo::PossibleCauses({
143-
"Not enough memory available",
144-
"Not enough hugepages allocated (check 'hugeadm --pool-list')"}));
140+
<< ErrorInfo::Message(std::string("Failed to memory map file: ") + e.what())
141+
<< ErrorInfo::PossibleCauses({ "Not enough memory available",
142+
"Not enough hugepages allocated (check 'hugeadm --pool-list')",
143+
"Stale hugepage / uio_pci_dma resources (run 'roc-cleanup')" }));
145144
}
146-
}
147-
catch (MemoryMapException& e) {
145+
} catch (MemoryMapException& e) {
148146
e << ErrorInfo::FileName(fileName) << ErrorInfo::FileSize(fileSize);
149147
throw;
150148
}

0 commit comments

Comments
 (0)