Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions docs/content/stable/explore/observability/yb-tablet-metadata.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ The following table describes the columns of the `yb_tablet_metadata` view.
| end_hash_code | int | Ending hash code (exclusive) for the tablet. (NULL for range-sharded tables.) |
| leader | text | IP address, port of the leader node for the tablet. |
| replicas | text[] | A list of replica IP addresses and port (includes leader) associated with the tablet. |
| attributes | jsonb | Per-replica metrics as a JSONB object. Contains a `"replicas"` key mapping each replica address to its `active_sst_sizes` and `wal_sizes` (in bytes). |

## Examples

Expand All @@ -53,16 +54,16 @@ SELECT * FROM yb_tablet_metadata WHERE db_name = 'yugabyte' AND relname = 'test_
```

```output
+----------------------------------+-------+----------+-------------+-----------------+---------------+------------------+--------------------------------------------------------+
| tablet_id | oid | db_name | relname | start_hash_code | end_hash_code | leader | replicas |
|----------------------------------+-------+----------+-------------+-----------------+---------------+------------------+--------------------------------------------------------|
| 3987b6a16bf94fbd92262744197350d7 | 16384 | yugabyte | test_table | 0 | 10922 | 127.0.0.2:5433 | ['127.0.0.1:5433', '127.0.0.2:5433', '127.0.0.3:5433'] |
| dd50b59c7dcb493680093ffa5b195634 | 16384 | yugabyte | test_table | 10922 | 21845 | 127.0.0.1:5433 | ['127.0.0.1:5433', '127.0.0.2:5433', '127.0.0.3:5433'] |
| bed5b3c3eee747e99622a4e21acf437a | 16384 | yugabyte | test_table | 21845 | 32768 | 127.0.0.3:5433 | ['127.0.0.1:5433', '127.0.0.2:5433', '127.0.0.3:5433'] |
| da4bad5faa9f448f890cce57c775cd94 | 16384 | yugabyte | test_table | 32768 | 43690 | 127.0.0.2:5433 | ['127.0.0.1:5433', '127.0.0.2:5433', '127.0.0.3:5433'] |
| 52176c704c614846bbd80f481678519e | 16384 | yugabyte | test_table | 43690 | 54613 | 127.0.0.1:5433 | ['127.0.0.1:5433', '127.0.0.2:5433', '127.0.0.3:5433'] |
| ea252119fe774ba9bdc585504fae9398 | 16384 | yugabyte | test_table | 54613 | 65536 | 127.0.0.3:5433 | ['127.0.0.1:5433', '127.0.0.2:5433', '127.0.0.3:5433'] |
+----------------------------------+-------+----------+-------------+-----------------+---------------+------------------+--------------------------------------------------------+
+----------------------------------+-------+----------+-------------+-----------------+---------------+------------------+--------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| tablet_id | oid | db_name | relname | start_hash_code | end_hash_code | leader | replicas | attributes |
|----------------------------------+-------+----------+-------------+-----------------+---------------+------------------+--------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 3987b6a16bf94fbd92262744197350d7 | 16384 | yugabyte | test_table | 0 | 10922 | 127.0.0.2:5433 | ['127.0.0.1:5433', '127.0.0.2:5433', '127.0.0.3:5433'] | {"replicas": {"127.0.0.1:5433": {"active_sst_sizes": 0, "wal_sizes": 0}, "127.0.0.2:5433": {"active_sst_sizes": 0, "wal_sizes": 0}, "127.0.0.3:5433": {"active_sst_sizes": 0, "wal_sizes": 0}}} |
| dd50b59c7dcb493680093ffa5b195634 | 16384 | yugabyte | test_table | 10922 | 21845 | 127.0.0.1:5433 | ['127.0.0.1:5433', '127.0.0.2:5433', '127.0.0.3:5433'] | {"replicas": {"127.0.0.1:5433": {"active_sst_sizes": 0, "wal_sizes": 0}, "127.0.0.2:5433": {"active_sst_sizes": 0, "wal_sizes": 0}, "127.0.0.3:5433": {"active_sst_sizes": 0, "wal_sizes": 0}}} |
| bed5b3c3eee747e99622a4e21acf437a | 16384 | yugabyte | test_table | 21845 | 32768 | 127.0.0.3:5433 | ['127.0.0.1:5433', '127.0.0.2:5433', '127.0.0.3:5433'] | {"replicas": {"127.0.0.1:5433": {"active_sst_sizes": 0, "wal_sizes": 0}, "127.0.0.2:5433": {"active_sst_sizes": 0, "wal_sizes": 0}, "127.0.0.3:5433": {"active_sst_sizes": 0, "wal_sizes": 0}}} |
| da4bad5faa9f448f890cce57c775cd94 | 16384 | yugabyte | test_table | 32768 | 43690 | 127.0.0.2:5433 | ['127.0.0.1:5433', '127.0.0.2:5433', '127.0.0.3:5433'] | {"replicas": {"127.0.0.1:5433": {"active_sst_sizes": 0, "wal_sizes": 0}, "127.0.0.2:5433": {"active_sst_sizes": 0, "wal_sizes": 0}, "127.0.0.3:5433": {"active_sst_sizes": 0, "wal_sizes": 0}}} |
| 52176c704c614846bbd80f481678519e | 16384 | yugabyte | test_table | 43690 | 54613 | 127.0.0.1:5433 | ['127.0.0.1:5433', '127.0.0.2:5433', '127.0.0.3:5433'] | {"replicas": {"127.0.0.1:5433": {"active_sst_sizes": 0, "wal_sizes": 0}, "127.0.0.2:5433": {"active_sst_sizes": 0, "wal_sizes": 0}, "127.0.0.3:5433": {"active_sst_sizes": 0, "wal_sizes": 0}}} |
| ea252119fe774ba9bdc585504fae9398 | 16384 | yugabyte | test_table | 54613 | 65536 | 127.0.0.3:5433 | ['127.0.0.1:5433', '127.0.0.2:5433', '127.0.0.3:5433'] | {"replicas": {"127.0.0.1:5433": {"active_sst_sizes": 0, "wal_sizes": 0}, "127.0.0.2:5433": {"active_sst_sizes": 0, "wal_sizes": 0}, "127.0.0.3:5433": {"active_sst_sizes": 0, "wal_sizes": 0}}} |
+----------------------------------+-------+----------+-------------+-----------------+---------------+------------------+--------------------------------------------------------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
```

### Join with yb_servers
Expand Down Expand Up @@ -268,4 +269,3 @@ SELECT
| 99ed7472ccde4af787cb0bcfd4fd90bd | yugabyte | test_table | 0 | 32768 | RocksDB_NewIterator | TServer | DiskIO | 1 |
+----------------------------------+----------+------------------+-----------------+---------------+------------------------------------+----------------------+-----------------+-------+
```

3 changes: 2 additions & 1 deletion src/postgres/src/backend/catalog/yb_system_views.sql
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ CREATE VIEW yb_tablet_metadata AS
t.start_hash_code,
t.end_hash_code,
t.leader,
t.replicas
t.replicas,
t.attributes
FROM
yb_get_tablet_metadata() t
LEFT JOIN
Expand Down
99 changes: 86 additions & 13 deletions src/postgres/src/backend/utils/misc/pg_yb_utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@
#endif
#include "storage/procarray.h"
#include "tcop/utility.h"
#include "utils/array.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
#include "utils/array.h"

#include "utils/builtins.h"
#include "utils/datum.h"
#include "utils/fmgroids.h"
Expand Down Expand Up @@ -8482,10 +8483,13 @@ string_list_compare(const ListCell *a, const ListCell *b)
* - end_hash_code: int32
* - leader: text
* - replicas: text[]
* - attributes: jsonb (nested structure with per-replica data)
*
* The start_hash_code and end_hash_code are the hash codes of the start and end
* keys of the tablet for hash sharded tables. Leader is provided as a separate
* column for simpler querying and self-explanatory access.
* column for simpler querying and self-explanatory access. The attributes column
* contains a JSONB object with a "replicas" key mapping each replica address to
* its metrics (active_sst_sizes, wal_sizes).
*/
Datum
yb_get_tablet_metadata(PG_FUNCTION_ARGS)
Expand All @@ -8495,7 +8499,7 @@ yb_get_tablet_metadata(PG_FUNCTION_ARGS)
Tuplestorestate *tupstore;
MemoryContext per_query_ctx;
MemoryContext oldcontext;
static int ncols = 9;
static int ncols = 10;

/* check to see if caller supports us returning a tuplestore */
if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
Expand Down Expand Up @@ -8564,31 +8568,100 @@ yb_get_tablet_metadata(PG_FUNCTION_ARGS)
nulls[6] = true;
}

/* Convert replicas array to PostgreSQL text array */
/* Convert replicas to PostgreSQL text array and build JSONB attributes */
if (tablet->replicas_count > 0)
{
size_t nreplicas = tablet->replicas_count;

Assert(tablet->replicas != NULL);

/* The last replica is the leader. */
values[7] = CStringGetTextDatum(tablet->replicas[tablet->replicas_count - 1]);
values[7] = CStringGetTextDatum(tablet->replicas[nreplicas - 1]);

/* Convert char ** to List * */
List *replicas_list = NIL;
{
List *replicas_list = NIL;

for (size_t idx = 0; idx < tablet->replicas_count; idx++)
replicas_list = lappend(replicas_list, (char *) tablet->replicas[idx]);
for (size_t idx = 0; idx < nreplicas; idx++)
replicas_list = lappend(replicas_list,
(char *) tablet->replicas[idx]);

/*
* Sort the list lexicographically for consistency, so that all rows
* with same replicas have same entries.
*/
list_sort(replicas_list, string_list_compare);
values[8] = PointerGetDatum(strlist_to_textarray(replicas_list));
/*
* Sort the list lexicographically for consistency, so that
* all rows with same replicas have same entries.
*/
list_sort(replicas_list, string_list_compare);
values[8] = PointerGetDatum(strlist_to_textarray(replicas_list));
}

/* Build JSONB attributes with per-replica size metrics. */
{
JsonbParseState *jb_state = NULL;
JsonbValue jb_result;
JsonbValue jb_key;
JsonbValue jb_val;

/* {"replicas": {"addr": {"active_sst_sizes": N, "wal_sizes": N}, ...}} */
pushJsonbValue(&jb_state, WJB_BEGIN_OBJECT, NULL);

jb_key.type = jbvString;
jb_key.val.string.val = "replicas";
jb_key.val.string.len = 8;
pushJsonbValue(&jb_state, WJB_KEY, &jb_key);

pushJsonbValue(&jb_state, WJB_BEGIN_OBJECT, NULL);

for (size_t idx = 0; idx < nreplicas; idx++)
{
int64 sst_size;
int64 wal_size;

sst_size = tablet->replica_sst_sizes
? tablet->replica_sst_sizes[idx] : 0;
wal_size = tablet->replica_wal_sizes
? tablet->replica_wal_sizes[idx] : 0;

/* Replica address as key */
jb_key.type = jbvString;
jb_key.val.string.val = (char *) tablet->replicas[idx];
jb_key.val.string.len = strlen(tablet->replicas[idx]);
pushJsonbValue(&jb_state, WJB_KEY, &jb_key);

/* Per-replica object */
pushJsonbValue(&jb_state, WJB_BEGIN_OBJECT, NULL);

jb_key.type = jbvString;
jb_key.val.string.val = "active_sst_sizes";
jb_key.val.string.len = 16;
pushJsonbValue(&jb_state, WJB_KEY, &jb_key);
jb_val.type = jbvNumeric;
jb_val.val.numeric = DatumGetNumeric(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error (S&RX) bad_opening_paren
There should be no linebreak after (

DirectFunctionCall1(int8_numeric, Int64GetDatum(sst_size)));
pushJsonbValue(&jb_state, WJB_VALUE, &jb_val);

jb_key.type = jbvString;
jb_key.val.string.val = "wal_sizes";
jb_key.val.string.len = 9;
pushJsonbValue(&jb_state, WJB_KEY, &jb_key);
jb_val.type = jbvNumeric;
jb_val.val.numeric = DatumGetNumeric(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error (S&RX) bad_opening_paren
There should be no linebreak after (

DirectFunctionCall1(int8_numeric, Int64GetDatum(wal_size)));
pushJsonbValue(&jb_state, WJB_VALUE, &jb_val);

pushJsonbValue(&jb_state, WJB_END_OBJECT, NULL);
}

pushJsonbValue(&jb_state, WJB_END_OBJECT, NULL); /* end replicas */
jb_result = *pushJsonbValue(&jb_state, WJB_END_OBJECT, NULL); /* end outer */

values[9] = JsonbPGetDatum(JsonbValueToJsonb(&jb_result));
}
}
else
{
nulls[7] = true;
nulls[8] = true;
nulls[9] = true;
}

tuplestore_putvalues(tupstore, tupdesc, values, nulls);
Expand Down
6 changes: 3 additions & 3 deletions src/postgres/src/include/catalog/pg_proc.dat
Original file line number Diff line number Diff line change
Expand Up @@ -12234,9 +12234,9 @@
proname => 'yb_get_tablet_metadata', prorows => '100',
proretset => 't', provolatile => 'v', proparallel => 'r',
prorettype => 'record', proargtypes => '',
proallargtypes => '{text,text,text,text,text,int4,int4,text,_text}',
proargnames => '{tablet_id,object_uuid,namespace,object_name,type,start_hash_code,end_hash_code,leader,replicas}',
proargmodes => '{o,o,o,o,o,o,o,o,o}',
proallargtypes => '{text,text,text,text,text,int4,int4,text,_text,jsonb}',
proargnames => '{tablet_id,object_uuid,namespace,object_name,type,start_hash_code,end_hash_code,leader,replicas,attributes}',
proargmodes => '{o,o,o,o,o,o,o,o,o,o}',
prosrc => 'yb_get_tablet_metadata'},

{ oid => '8101', descr => 'Get the UUID of the local tserver',
Expand Down
4 changes: 2 additions & 2 deletions src/postgres/src/include/catalog/pg_yb_migration.dat
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
[

# For better version control conflict detection, list latest migration filename
# here: V100__30133__ysql_conn_mgr_coarse_grained_logical_client_version.sql
{ major => '100', minor => '0', name => '<baseline>', time_applied => '_null_' }
# here: V101__29665__yb_tablet_metadata_sizes.sql
{ major => '101', minor => '0', name => '<baseline>', time_applied => '_null_' }

]
48 changes: 48 additions & 0 deletions src/postgres/src/test/regress/expected/yb.orig.tablet_metadata.out
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,54 @@ ORDER BY start_hash_code NULLS FIRST;
test_table_1 | yugabyte | 32768 | 65536
(3 rows)

-- Test that attributes column is present with correct type and structure
SELECT
relname,
pg_typeof(attributes) AS attr_type,
jsonb_typeof(attributes->'replicas') AS replicas_type,
(SELECT count(*) FROM jsonb_object_keys(attributes->'replicas'))::int
= array_length(replicas, 1) AS key_count_match
FROM yb_tablet_metadata
WHERE relname = 'test_table_1'
ORDER BY start_hash_code NULLS FIRST
LIMIT 1;
relname | attr_type | replicas_type | key_count_match
--------------+-----------+---------------+-----------------
test_table_1 | jsonb | object | t
(1 row)

-- Test that all size values are non-negative for each replica
SELECT replica, (attrs->>'active_sst_sizes')::bigint AS active_sst_sizes,
(attrs->>'wal_sizes')::bigint AS wal_sizes
FROM yb_tablet_metadata tm,
unnest(tm.replicas) AS replica,
LATERAL (SELECT tm.attributes->'replicas'->replica AS attrs) a
WHERE tm.relname = 'test_table_1'
AND ((attrs->>'active_sst_sizes')::bigint < 0
OR (attrs->>'wal_sizes')::bigint < 0);
replica | active_sst_sizes | wal_sizes
---------+------------------+-----------
(0 rows)

-- Test that every replica in the replicas array has an entry in attributes
SELECT replica
FROM yb_tablet_metadata tm, unnest(tm.replicas) AS replica
WHERE tm.relname = 'test_table_1'
AND NOT (tm.attributes->'replicas' ? replica);
replica
---------
(0 rows)

-- Test that every replica address corresponds to a live tserver
SELECT unnest(replicas) AS orphan_replica
FROM yb_tablet_metadata
WHERE relname = 'test_table_1'
EXCEPT
SELECT host || ':' || port FROM yb_servers();
orphan_replica
----------------
(0 rows)

-- Test that we are able to join with yb_servers()
SELECT
ytm.relname,
Expand Down
35 changes: 35 additions & 0 deletions src/postgres/src/test/regress/sql/yb.orig.tablet_metadata.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,41 @@ SELECT
FROM yb_tablet_metadata WHERE relname IN ('test_table_1', 'test_table_2')
ORDER BY start_hash_code NULLS FIRST;

-- Test that attributes column is present with correct type and structure
SELECT
relname,
pg_typeof(attributes) AS attr_type,
jsonb_typeof(attributes->'replicas') AS replicas_type,
(SELECT count(*) FROM jsonb_object_keys(attributes->'replicas'))::int
= array_length(replicas, 1) AS key_count_match
FROM yb_tablet_metadata
WHERE relname = 'test_table_1'
ORDER BY start_hash_code NULLS FIRST
LIMIT 1;

-- Test that all size values are non-negative for each replica
SELECT replica, (attrs->>'active_sst_sizes')::bigint AS active_sst_sizes,
(attrs->>'wal_sizes')::bigint AS wal_sizes
FROM yb_tablet_metadata tm,
unnest(tm.replicas) AS replica,
LATERAL (SELECT tm.attributes->'replicas'->replica AS attrs) a
WHERE tm.relname = 'test_table_1'
AND ((attrs->>'active_sst_sizes')::bigint < 0
OR (attrs->>'wal_sizes')::bigint < 0);

-- Test that every replica in the replicas array has an entry in attributes
SELECT replica
FROM yb_tablet_metadata tm, unnest(tm.replicas) AS replica
WHERE tm.relname = 'test_table_1'
AND NOT (tm.attributes->'replicas' ? replica);

-- Test that every replica address corresponds to a live tserver
SELECT unnest(replicas) AS orphan_replica
FROM yb_tablet_metadata
WHERE relname = 'test_table_1'
EXCEPT
SELECT host || ':' || port FROM yb_servers();

-- Test that we are able to join with yb_servers()
SELECT
ytm.relname,
Expand Down
8 changes: 8 additions & 0 deletions src/yb/master/catalog_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14367,6 +14367,8 @@ void PopulateTabletMetadata(
}

std::string leader_address;
uint64_t leader_sst_files_size = 0;
uint64_t leader_wal_files_size = 0;
auto replica_locations = tablet->GetReplicaLocations();
for (const auto& [ts_uuid, replica] : *replica_locations) {
auto ts_desc_result = ts_manager->LookupTSByUUID(ts_uuid);
Expand All @@ -14392,15 +14394,21 @@ void PopulateTabletMetadata(

if (replica.role == PeerRole::LEADER) {
leader_address = server_address;
leader_sst_files_size = replica.drive_info.sst_files_size;
leader_wal_files_size = replica.drive_info.wal_files_size;
} else {
tablet_metadata->add_replicas(server_address);
tablet_metadata->add_replica_sst_sizes(replica.drive_info.sst_files_size);
tablet_metadata->add_replica_wal_sizes(replica.drive_info.wal_files_size);
}
}
}

// Add leader as the last replica
if (!leader_address.empty()) {
tablet_metadata->add_replicas(leader_address);
tablet_metadata->add_replica_sst_sizes(leader_sst_files_size);
tablet_metadata->add_replica_wal_sizes(leader_wal_files_size);
}

auto tablet_lock = tablet->LockForRead();
Expand Down
3 changes: 3 additions & 0 deletions src/yb/tablet/tablet.proto
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,9 @@ message TabletStatusPB {
// Leader is the last replica in the list.
repeated string replicas = 24;
optional bool is_hash_partitioned = 25;
// Per-replica SST and WAL sizes in bytes, parallel to the replicas array.
repeated uint64 replica_sst_sizes = 26;
repeated uint64 replica_wal_sizes = 27;
}

// Used to present the maintenance manager's internal state.
Expand Down
2 changes: 2 additions & 0 deletions src/yb/yql/pggate/ybc_pg_typedefs.h
Original file line number Diff line number Diff line change
Expand Up @@ -875,6 +875,8 @@ typedef struct {
const char** replicas;
size_t replicas_count;
bool is_hash_partitioned;
const uint64_t* replica_sst_sizes;
const uint64_t* replica_wal_sizes;
} YbcPgGlobalTabletsDescriptor;

typedef struct {
Expand Down
Loading