Skip to content

Commit c1a12ad

Browse files
Merge pull request ClickHouse#79836 from Dolso/sync-system-error_log-and-system-errors
Sync system.error_log and system.errors
2 parents 7d335b9 + b3740e5 commit c1a12ad

File tree

15 files changed

+304
-115
lines changed

15 files changed

+304
-115
lines changed

docs/en/operations/server-configuration-parameters/_server_settings_outside_source.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ To manually turn on error history collection [`system.error_log`](../../operatio
244244
<reserved_size_rows>8192</reserved_size_rows>
245245
<buffer_size_rows_flush_threshold>524288</buffer_size_rows_flush_threshold>
246246
<flush_on_crash>false</flush_on_crash>
247+
<symbolize>true</symbolize>
247248
</error_log>
248249
</clickhouse>
249250
```

docs/en/operations/system-tables/error_log.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,16 @@ Columns:
1818
- `event_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — Event time.
1919
- `code` ([Int32](../../sql-reference/data-types/int-uint.md)) — Code number of the error.
2020
- `error` ([LowCardinality(String)](../../sql-reference/data-types/string.md)) - Name of the error.
21+
- `last_error_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the last error happened.
22+
- `last_error_message` ([String](../../sql-reference/data-types/string.md)) — Message for the last error.
2123
- `value` ([UInt64](../../sql-reference/data-types/int-uint.md)) — The number of times this error happened.
2224
- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — Remote exception (i.e. received during one of the distributed queries).
25+
- `last_error_query_id` ([String](../../sql-reference/data-types/string.md)) — Id of a query that caused the last error (if available).
26+
- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A stack trace that represents a list of physical addresses where the called methods are stored.
27+
- `last_error_symbols` ([Array(LowCardinality(String))](../../sql-reference/data-types/array.md)), If the symbolization is enabled, contains demangled symbol names, corresponding to the `last_error_trace`.
28+
- `last_error_lines` ([Array(LowCardinality(String))](../../sql-reference/data-types/array.md)), If the symbolization is enabled, contains strings with file names with line numbers, corresponding to the `last_error_trace`.
29+
30+
The symbolization can be enabled or disabled in the `symbolize` under `error_log` in the server's configuration file.
2331

2432
**Example**
2533

docs/en/operations/system-tables/errors.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ Columns:
2121
- `last_error_message` ([String](../../sql-reference/data-types/string.md)) — message for the last error.
2222
- `last_error_trace` ([Array(UInt64)](../../sql-reference/data-types/array.md)) — A [stack trace](https://en.wikipedia.org/wiki/Stack_trace) that represents a list of physical addresses where the called methods are stored.
2323
- `remote` ([UInt8](../../sql-reference/data-types/int-uint.md)) — remote exception (i.e. received during one of the distributed queries).
24+
- `last_error_query_id` ([String](../../sql-reference/data-types/string.md)) — Id of a query that caused the last error (if available).
25+
- `last_error_symbols` ([Array(LowCardinality(String))](../../sql-reference/data-types/array.md)) — Contains demangled symbol names, corresponding to the `last_error_trace`.
26+
- `last_error_lines` ([Array(LowCardinality(String))](../../sql-reference/data-types/array.md)) — Contains strings with file names with line numbers, corresponding to the `last_error_trace`.
2427

2528
:::note
2629
Counters for some errors may increase during successful query execution. It's not recommended to use this table for server monitoring purposes unless you are sure that corresponding error can not be a false positive.

src/Common/AddressToLineCache.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#if defined(__ELF__) && !defined(OS_FREEBSD)
2+
#include <Common/AddressToLineCache.h>
3+
#include <Common/SymbolIndex.h>
4+
#include <IO/WriteHelpers.h>
5+
6+
#include <filesystem>
7+
8+
namespace DB
9+
{
10+
11+
12+
void AddressToLineCache::setResult(StringRef & result, const Dwarf::LocationInfo & location, const std::vector<Dwarf::SymbolizedFrame> &)
13+
{
14+
const char * arena_begin = nullptr;
15+
WriteBufferFromArena out(arena, arena_begin);
16+
17+
writeString(location.file.toString(), out);
18+
writeChar(':', out);
19+
writeIntText(location.line, out);
20+
21+
out.finalize();
22+
result = out.complete();
23+
}
24+
25+
StringRef AddressToLineCache::impl(uintptr_t addr)
26+
{
27+
const SymbolIndex & symbol_index = SymbolIndex::instance();
28+
29+
if (const auto * object = symbol_index.findObject(reinterpret_cast<const void *>(addr)))
30+
{
31+
auto dwarf_it = dwarfs.try_emplace(object->name, object->elf).first;
32+
if (!std::filesystem::exists(object->name))
33+
return {};
34+
35+
Dwarf::LocationInfo location;
36+
std::vector<Dwarf::SymbolizedFrame> frames; // NOTE: not used in FAST mode.
37+
StringRef result;
38+
if (dwarf_it->second.findAddress(addr - uintptr_t(object->address_begin), location, Dwarf::LocationInfoMode::FAST, frames))
39+
{
40+
setResult(result, location, frames);
41+
return result;
42+
}
43+
return {object->name};
44+
}
45+
return {};
46+
}
47+
48+
StringRef AddressToLineCache::implCached(uintptr_t addr)
49+
{
50+
typename Map::LookupResult it;
51+
bool inserted;
52+
map.emplace(addr, it, inserted);
53+
if (inserted)
54+
it->getMapped() = impl(addr);
55+
return it->getMapped();
56+
}
57+
58+
StringRef AddressToLineCache::get(uintptr_t addr)
59+
{
60+
static AddressToLineCache cache;
61+
return cache.implCached(addr);
62+
}
63+
64+
}
65+
#endif

src/Common/AddressToLineCache.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#pragma once
2+
#if defined(__ELF__) && !defined(OS_FREEBSD)
3+
4+
#include <IO/WriteBufferFromArena.h>
5+
#include <Common/Dwarf.h>
6+
#include <Common/HashTable/HashMap.h>
7+
8+
9+
namespace DB
10+
{
11+
12+
class AddressToLineCache
13+
{
14+
public:
15+
static StringRef get(uintptr_t addr);
16+
17+
protected:
18+
void setResult(StringRef & result, const Dwarf::LocationInfo & location, const std::vector<Dwarf::SymbolizedFrame> &);
19+
StringRef impl(uintptr_t addr);
20+
StringRef implCached(uintptr_t addr);
21+
22+
private:
23+
using Map = HashMap<uintptr_t, StringRef>;
24+
Arena arena;
25+
Map map;
26+
std::unordered_map<std::string, Dwarf> dwarfs;
27+
};
28+
29+
}
30+
#endif

src/Common/SymbolsHelper.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
#if defined(__ELF__) && !defined(OS_FREEBSD)
2+
#include <Core/Field.h>
3+
#include <base/demangle.h>
4+
#include <Common/SymbolIndex.h>
5+
#include <Common/AddressToLineCache.h>
6+
7+
8+
namespace DB
9+
{
10+
11+
std::pair<Array, Array> generateArraysSymbolsLines(const std::vector<UInt64> & trace)
12+
{
13+
Array symbols;
14+
Array lines;
15+
size_t num_frames = trace.size();
16+
symbols.reserve(num_frames);
17+
lines.reserve(num_frames);
18+
19+
const SymbolIndex & symbol_index = SymbolIndex::instance();
20+
21+
for (size_t frame = 0; frame < num_frames; ++frame)
22+
{
23+
if (const auto * symbol = symbol_index.findSymbol(reinterpret_cast<const void *>(trace[frame])))
24+
{
25+
std::string_view mangled_symbol(symbol->name);
26+
27+
auto demangled = tryDemangle(symbol->name);
28+
if (demangled)
29+
symbols.emplace_back(std::string_view(demangled.get()));
30+
else
31+
symbols.emplace_back(std::string_view(symbol->name));
32+
33+
lines.emplace_back(AddressToLineCache::get(trace[frame]).toView());
34+
}
35+
else
36+
{
37+
symbols.emplace_back(String());
38+
lines.emplace_back(String());
39+
}
40+
}
41+
42+
return {symbols, lines};
43+
}
44+
45+
}
46+
#endif

src/Common/SymbolsHelper.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#pragma once
2+
#if defined(__ELF__) && !defined(OS_FREEBSD)
3+
#include <base/demangle.h>
4+
#include <Core/Field.h>
5+
#include <Common/AddressToLineCache.h>
6+
7+
#include <vector>
8+
9+
namespace DB
10+
{
11+
12+
std::pair<Array, Array> generateArraysSymbolsLines(const std::vector<UInt64> & trace);
13+
14+
}
15+
#endif

src/Interpreters/ErrorLog.cpp

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,15 +10,16 @@
1010
#include <Interpreters/ErrorLog.h>
1111
#include <Parsers/ExpressionElementParsers.h>
1212
#include <Parsers/parseQuery.h>
13+
#include <Common/SymbolsHelper.h>
1314

14-
#include <vector>
1515

1616
namespace DB
1717
{
1818

1919
ColumnsDescription ErrorLogElement::getColumnsDescription()
2020
{
2121
ParserCodec codec_parser;
22+
DataTypePtr symbolized_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>()));
2223
return ColumnsDescription {
2324
{
2425
"hostname",
@@ -50,6 +51,18 @@ ColumnsDescription ErrorLogElement::getColumnsDescription()
5051
parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS),
5152
"Error name."
5253
},
54+
{
55+
"last_error_time",
56+
std::make_shared<DataTypeDateTime>(),
57+
parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS),
58+
"The time when the last error happened."
59+
},
60+
{
61+
"last_error_message",
62+
std::make_shared<DataTypeString>(),
63+
parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS),
64+
"Message for the last error."
65+
},
5366
{
5467
"value",
5568
std::make_shared<DataTypeUInt64>(),
@@ -61,6 +74,30 @@ ColumnsDescription ErrorLogElement::getColumnsDescription()
6174
std::make_shared<DataTypeUInt8>(),
6275
parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS),
6376
"Remote exception (i.e. received during one of the distributed queries)."
77+
},
78+
{
79+
"last_error_query_id",
80+
std::make_shared<DataTypeString>(),
81+
parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS),
82+
"Id of a query that caused the last error (if available)."
83+
},
84+
{
85+
"last_error_trace",
86+
std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>()),
87+
parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS),
88+
"A stack trace that represents a list of physical addresses where the called methods are stored."
89+
},
90+
{
91+
"symbols",
92+
symbolized_type,
93+
parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS),
94+
"If the symbolization is enabled, contains demangled symbol names, corresponding to the `trace`."
95+
},
96+
{
97+
"lines",
98+
symbolized_type,
99+
parseQuery(codec_parser, "(ZSTD(1))", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS),
100+
"If the symbolization is enabled, contains strings with file names with line numbers, corresponding to the `trace`."
64101
}
65102
};
66103
}
@@ -74,8 +111,33 @@ void ErrorLogElement::appendToBlock(MutableColumns & columns) const
74111
columns[column_idx++]->insert(event_time);
75112
columns[column_idx++]->insert(code);
76113
columns[column_idx++]->insert(ErrorCodes::getName(code));
114+
columns[column_idx++]->insert(error_time_ms / 1000);
115+
columns[column_idx++]->insert(error_message);
77116
columns[column_idx++]->insert(value);
78117
columns[column_idx++]->insert(remote);
118+
columns[column_idx++]->insert(query_id);
119+
120+
std::vector<UInt64> error_trace_array;
121+
error_trace_array.reserve(error_trace.size());
122+
for (auto * ptr : error_trace)
123+
error_trace_array.emplace_back(reinterpret_cast<UInt64>(ptr));
124+
125+
columns[column_idx++]->insert(Array(error_trace_array.begin(), error_trace_array.end()));
126+
127+
#if defined(__ELF__) && !defined(OS_FREEBSD)
128+
if (symbolize)
129+
{
130+
auto [symbols, lines] = generateArraysSymbolsLines(error_trace_array);
131+
132+
columns[column_idx++]->insert(symbols);
133+
columns[column_idx++]->insert(lines);
134+
}
135+
else
136+
#endif
137+
{
138+
columns[column_idx++]->insertDefault();
139+
columns[column_idx++]->insertDefault();
140+
}
79141
}
80142

81143
struct ValuePair
@@ -99,8 +161,13 @@ void ErrorLog::stepFunction(TimePoint current_time)
99161
ErrorLogElement local_elem {
100162
.event_time=event_time,
101163
.code=code,
164+
.error_time_ms=error.local.error_time_ms,
165+
.error_message=error.local.message,
102166
.value=error.local.count - previous_values.at(code).local,
103-
.remote=false
167+
.remote=false,
168+
.query_id=error.local.query_id,
169+
.error_trace=error.local.trace,
170+
.symbolize=symbolize
104171
};
105172
this->add(std::move(local_elem));
106173
previous_values[code].local = error.local.count;
@@ -110,8 +177,13 @@ void ErrorLog::stepFunction(TimePoint current_time)
110177
ErrorLogElement remote_elem {
111178
.event_time=event_time,
112179
.code=code,
180+
.error_time_ms=error.remote.error_time_ms,
181+
.error_message=error.remote.message,
113182
.value=error.remote.count - previous_values.at(code).remote,
114-
.remote=true
183+
.remote=true,
184+
.query_id=error.remote.query_id,
185+
.error_trace=error.remote.trace,
186+
.symbolize=symbolize
115187
};
116188
add(std::move(remote_elem));
117189
previous_values[code].remote = error.remote.count;

src/Interpreters/ErrorLog.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,11 @@
44
#include <Common/ErrorCodes.h>
55
#include <Core/NamesAndTypes.h>
66
#include <Core/NamesAndAliases.h>
7+
#include <DataTypes/DataTypeArray.h>
78
#include <Storages/ColumnsDescription.h>
89

10+
#include <vector>
11+
912

1013
namespace DB
1114
{
@@ -17,8 +20,13 @@ struct ErrorLogElement
1720
{
1821
time_t event_time{};
1922
ErrorCodes::ErrorCode code{};
23+
UInt64 error_time_ms = 0;
24+
std::string error_message{};
2025
ErrorCodes::Value value{};
2126
bool remote{};
27+
std::string query_id;
28+
std::vector<void *> error_trace{};
29+
bool symbolize = false;
2230

2331
static std::string name() { return "ErrorLog"; }
2432
static ColumnsDescription getColumnsDescription();
@@ -31,8 +39,20 @@ class ErrorLog : public PeriodicLog<ErrorLogElement>
3139
{
3240
using PeriodicLog<ErrorLogElement>::PeriodicLog;
3341

42+
public:
43+
ErrorLog(ContextPtr context_,
44+
const SystemLogSettings & settings_,
45+
std::shared_ptr<SystemLogQueue<ErrorLogElement>> queue_ = nullptr)
46+
: PeriodicLog<ErrorLogElement>(context_, settings_, queue_),
47+
symbolize(settings_.symbolize_traces)
48+
{
49+
}
50+
3451
protected:
3552
void stepFunction(TimePoint current_time) override;
53+
54+
private:
55+
bool symbolize;
3656
};
3757

3858
}

src/Interpreters/SystemLog.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414
#include <Interpreters/Context.h>
1515
#include <Interpreters/CrashLog.h>
1616
#include <Interpreters/DatabaseCatalog.h>
17-
#include <Interpreters/ErrorLog.h>
1817
#include <Interpreters/FilesystemCacheLog.h>
1918
#include <Interpreters/FilesystemReadPrefetchesLog.h>
2019
#include <Interpreters/InterpreterCreateQuery.h>
@@ -34,6 +33,7 @@
3433
#include <Interpreters/SessionLog.h>
3534
#include <Interpreters/TextLog.h>
3635
#include <Interpreters/TraceLog.h>
36+
#include <Interpreters/ErrorLog.h>
3737
#include <Interpreters/TransactionsInfoLog.h>
3838
#include <Interpreters/ZooKeeperLog.h>
3939
#include <IO/WriteHelpers.h>
@@ -279,7 +279,7 @@ std::shared_ptr<TSystemLog> createSystemLog(
279279
log_settings.queue_settings.notify_flush_on_crash = config.getBool(config_prefix + ".flush_on_crash",
280280
TSystemLog::shouldNotifyFlushOnCrash());
281281

282-
if constexpr (std::is_same_v<TSystemLog, TraceLog>)
282+
if constexpr (std::is_same_v<TSystemLog, TraceLog> || std::is_same_v<TSystemLog, ErrorLog>)
283283
log_settings.symbolize_traces = config.getBool(config_prefix + ".symbolize", false);
284284

285285
log_settings.queue_settings.turn_off_logger = TSystemLog::shouldTurnOffLogger();

0 commit comments

Comments
 (0)