Skip to content

Commit bab22af

Browse files
authored
feat: add subgraph health endpoint (#449)
1 parent 775f2f6 commit bab22af

File tree

6 files changed

+329
-3
lines changed

6 files changed

+329
-3
lines changed

README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,35 @@ curl -X POST \
113113
}
114114
```
115115

116+
## Subgraph health check
117+
```bash
118+
curl http://localhost:7600/subgraphs/health/QmVhiE4nax9i86UBnBmQCYDzvjWuwHShYh7aspGPQhU5Sj
119+
```
120+
```json
121+
{
122+
"health": "healthy"
123+
}
124+
```
125+
## Unfound subgraph
126+
```bash
127+
curl http://localhost:7600/subgraphs/health/QmacQnSgia4iDPWHpeY6aWxesRFdb8o5DKZUx96zZqEWrB
128+
```
129+
```json
130+
{
131+
"error": "Deployment not found"
132+
}
133+
```
134+
## Failed Subgraph
135+
```bash
136+
curl http://localhost:7600/subgraphs/health/QmVGSJyvjEjkk5U9EdxyyB78NCXK3EAoFhrzm6LV7SxxAm
137+
```
138+
```json
139+
{
140+
"fatalError": "transaction 21e77ed08fbc9df7be81101e9b03c2616494cee7cac2f6ad4f1ee387cf799e0c: error while executing at wasm backtrace:\t 0: 0x5972 - <unknown>!mappings/core/handleSwap: Mapping aborted at mappings/core.ts, line 73, column 16, with message: unexpected null in handler `handleSwap` at block #36654250 (5ab4d80c8e2cd628d5bf03abab4c302fd21d25d734e66afddff7a706b804fe13)",
141+
"health": "failed"
142+
}
143+
```
144+
116145
# Network queries
117146
## Checks for auth and configuration to serve-network-subgraph
118147

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
// Copyright 2023-, Edge & Node, GraphOps, and Semiotic Labs.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
use axum::{
5+
extract::Path,
6+
response::{IntoResponse, Response as AxumResponse},
7+
Extension, Json,
8+
};
9+
use graphql_client::GraphQLQuery;
10+
use indexer_config::GraphNodeConfig;
11+
use reqwest::StatusCode;
12+
use serde_json::json;
13+
use thiserror::Error;
14+
15+
#[derive(GraphQLQuery)]
16+
#[graphql(
17+
schema_path = "../graphql/indexing_status.schema.graphql",
18+
query_path = "../graphql/subgraph_health.query.graphql",
19+
response_derives = "Debug",
20+
variables_derives = "Clone"
21+
)]
22+
pub struct HealthQuery;
23+
24+
#[derive(Debug, Error)]
25+
pub enum CheckHealthError {
26+
#[error("Failed to send request")]
27+
RequestFailed,
28+
#[error("Failed to decode response")]
29+
BadResponse,
30+
#[error("Deployment not found")]
31+
DeploymentNotFound,
32+
#[error("Invalid health status found")]
33+
InvalidHealthStatus,
34+
}
35+
36+
impl IntoResponse for CheckHealthError {
37+
fn into_response(self) -> AxumResponse {
38+
let status = match &self {
39+
CheckHealthError::DeploymentNotFound => StatusCode::NOT_FOUND,
40+
CheckHealthError::InvalidHealthStatus | CheckHealthError::BadResponse => {
41+
StatusCode::INTERNAL_SERVER_ERROR
42+
}
43+
CheckHealthError::RequestFailed => StatusCode::BAD_GATEWAY,
44+
};
45+
let body = serde_json::json!({
46+
"error": self.to_string(),
47+
});
48+
(status, Json(body)).into_response()
49+
}
50+
}
51+
52+
pub async fn health(
53+
Path(deployment_id): Path<String>,
54+
Extension(graph_node): Extension<GraphNodeConfig>,
55+
) -> Result<impl IntoResponse, CheckHealthError> {
56+
let req_body = HealthQuery::build_query(health_query::Variables {
57+
ids: vec![deployment_id],
58+
});
59+
60+
let client = reqwest::Client::new();
61+
let response = client
62+
.post(graph_node.status_url)
63+
.json(&req_body)
64+
.send()
65+
.await
66+
.map_err(|_| CheckHealthError::RequestFailed)?;
67+
68+
let graphql_response: graphql_client::Response<health_query::ResponseData> = response
69+
.json()
70+
.await
71+
.map_err(|_| CheckHealthError::BadResponse)?;
72+
73+
let data = match (graphql_response.data, graphql_response.errors) {
74+
(Some(data), None) => data,
75+
_ => return Err(CheckHealthError::BadResponse),
76+
};
77+
78+
let Some(status) = data.indexing_statuses.first() else {
79+
return Err(CheckHealthError::DeploymentNotFound);
80+
};
81+
let health_response = match status.health {
82+
health_query::Health::healthy => json!({ "health": status.health }),
83+
health_query::Health::unhealthy => {
84+
let errors: Vec<&String> = status
85+
.non_fatal_errors
86+
.iter()
87+
.map(|msg| &msg.message)
88+
.collect();
89+
json!({ "health": status.health, "nonFatalErrors": errors })
90+
}
91+
health_query::Health::failed => {
92+
json!({ "health": status.health, "fatalError": status.fatal_error.as_ref().map_or("null", |msg| &msg.message) })
93+
}
94+
health_query::Health::Other(_) => return Err(CheckHealthError::InvalidHealthStatus),
95+
};
96+
Ok(Json(health_response))
97+
}

common/src/indexer_service/http/indexer_service.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,10 @@ use tower_http::{cors, cors::CorsLayer, normalize_path::NormalizePath, trace::Tr
3333
use tracing::error;
3434
use tracing::{info, info_span};
3535

36+
use super::request_handler::request_handler;
3637
use crate::escrow_accounts::EscrowAccounts;
3738
use crate::escrow_accounts::EscrowAccountsError;
39+
use crate::indexer_service::http::health::health;
3840
use crate::{
3941
address::public_key,
4042
indexer_service::http::static_subgraph::static_subgraph_request_handler,
@@ -44,8 +46,6 @@ use crate::{
4446
},
4547
tap::IndexerTapContext,
4648
};
47-
48-
use super::request_handler::request_handler;
4949
use indexer_config::Config;
5050

5151
pub trait IndexerServiceResponse {
@@ -386,7 +386,7 @@ impl IndexerService {
386386
.route("/", get("Service is up and running"))
387387
.route("/version", get(Json(options.release)))
388388
.route("/info", get(operator_address))
389-
.layer(misc_rate_limiter);
389+
.layer(misc_rate_limiter.clone());
390390

391391
// Rate limits by allowing bursts of 50 requests and requiring 20ms of
392392
// time between consecutive requests after that, effectively rate
@@ -401,6 +401,12 @@ impl IndexerService {
401401
),
402402
};
403403

404+
// Check subgraph Health
405+
misc_routes = misc_routes
406+
.route("/subgraph/health/:deployment_id", get(health))
407+
.route_layer(Extension(options.config.graph_node.clone()))
408+
.layer(misc_rate_limiter);
409+
404410
if options.config.service.serve_network_subgraph {
405411
info!("Serving network subgraph at /network");
406412

common/src/indexer_service/http/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
// Copyright 2023-, Edge & Node, GraphOps, and Semiotic Labs.
22
// SPDX-License-Identifier: Apache-2.0
33

4+
mod health;
45
mod indexer_service;
56
mod request_handler;
67
mod static_subgraph;
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
schema {
2+
query: Query
3+
}
4+
5+
type ApiVersion {
6+
"""
7+
Version number in SemVer format
8+
9+
"""
10+
version: String!
11+
}
12+
13+
scalar BigInt
14+
15+
type Block {
16+
hash: Bytes!
17+
number: BigInt!
18+
}
19+
20+
input BlockInput {
21+
hash: Bytes!
22+
number: BigInt!
23+
}
24+
25+
scalar Bytes
26+
27+
type CachedEthereumCall {
28+
idHash: Bytes!
29+
block: Block!
30+
contractAddress: Bytes!
31+
returnValue: Bytes!
32+
}
33+
34+
interface ChainIndexingStatus {
35+
network: String!
36+
chainHeadBlock: Block
37+
earliestBlock: EarliestBlock
38+
latestBlock: Block
39+
lastHealthyBlock: Block
40+
}
41+
42+
scalar Date
43+
44+
type EarliestBlock {
45+
hash: Bytes!
46+
number: BigInt!
47+
}
48+
49+
type EntityChanges {
50+
updates: [EntityTypeUpdates!]!
51+
deletions: [EntityTypeDeletions!]!
52+
}
53+
54+
type EntityTypeDeletions {
55+
type: String!
56+
entities: [ID!]!
57+
}
58+
59+
type EntityTypeUpdates {
60+
type: String!
61+
entities: [JSONObject!]!
62+
}
63+
64+
type EthereumIndexingStatus implements ChainIndexingStatus {
65+
network: String!
66+
chainHeadBlock: Block
67+
earliestBlock: EarliestBlock
68+
latestBlock: Block
69+
lastHealthyBlock: Block
70+
}
71+
72+
enum Feature {
73+
nonFatalErrors
74+
grafting
75+
fullTextSearch
76+
ipfsOnEthereumContracts
77+
aggregations
78+
declaredEthCalls
79+
immutableEntities
80+
bytesAsIds
81+
}
82+
83+
enum Health {
84+
"""Subgraph syncing normally"""
85+
healthy
86+
"""Subgraph syncing but with errors"""
87+
unhealthy
88+
"""Subgraph halted due to errors"""
89+
failed
90+
}
91+
92+
scalar JSONObject
93+
94+
type PartialBlock {
95+
hash: Bytes
96+
number: BigInt!
97+
}
98+
99+
input ProofOfIndexingRequest {
100+
deployment: String!
101+
block: BlockInput!
102+
}
103+
104+
type ProofOfIndexingResult {
105+
deployment: String!
106+
block: Block!
107+
"""
108+
There may not be a proof of indexing available for the deployment and block
109+
"""
110+
proofOfIndexing: Bytes
111+
}
112+
113+
input PublicProofOfIndexingRequest {
114+
deployment: String!
115+
blockNumber: BigInt!
116+
}
117+
118+
type PublicProofOfIndexingResult {
119+
deployment: String!
120+
block: PartialBlock!
121+
proofOfIndexing: Bytes!
122+
}
123+
124+
type Query {
125+
indexingStatusForCurrentVersion(subgraphName: String!): SubgraphIndexingStatus
126+
indexingStatusForPendingVersion(subgraphName: String!): SubgraphIndexingStatus
127+
indexingStatusesForSubgraphName(subgraphName: String!): [SubgraphIndexingStatus!]!
128+
indexingStatuses(subgraphs: [String!]): [SubgraphIndexingStatus!]!
129+
proofOfIndexing(subgraph: String!, blockNumber: Int!, blockHash: Bytes!, indexer: Bytes): Bytes
130+
"""
131+
Proofs of indexing for several deployments and blocks that can be shared and
132+
compared in public without revealing the _actual_ proof of indexing that every
133+
indexer has in their database
134+
135+
"""
136+
publicProofsOfIndexing(requests: [PublicProofOfIndexingRequest!]!): [PublicProofOfIndexingResult!]!
137+
subgraphFeatures(subgraphId: String!): SubgraphFeatures!
138+
entityChangesInBlock(subgraphId: String!, blockNumber: Int!): EntityChanges!
139+
blockData(network: String!, blockHash: Bytes!): JSONObject
140+
blockHashFromNumber(network: String!, blockNumber: Int!): Bytes
141+
version: Version!
142+
cachedEthereumCalls(network: String!, blockHash: Bytes!): [CachedEthereumCall!]
143+
apiVersions(subgraphId: String!): [ApiVersion!]!
144+
}
145+
146+
type SubgraphError {
147+
message: String!
148+
block: Block
149+
handler: String
150+
deterministic: Boolean!
151+
}
152+
153+
type SubgraphFeatures {
154+
apiVersion: String
155+
specVersion: String!
156+
features: [Feature!]!
157+
dataSources: [String!]!
158+
handlers: [String!]!
159+
network: String
160+
}
161+
162+
type SubgraphIndexingStatus {
163+
subgraph: String!
164+
synced: Boolean!
165+
health: Health!
166+
"""If the subgraph has failed, this is the error caused it"""
167+
fatalError: SubgraphError
168+
"""Sorted from first to last, limited to first 1000"""
169+
nonFatalErrors: [SubgraphError!]!
170+
chains: [ChainIndexingStatus!]!
171+
entityCount: BigInt!
172+
"""null if deployment is not assigned to an indexing node"""
173+
node: String
174+
"""null if deployment is not assigned to an indexing node"""
175+
paused: Boolean
176+
historyBlocks: Int!
177+
}
178+
179+
type Version {
180+
version: String!
181+
commit: String!
182+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
query HealthQuery($ids: [String!]!) {
2+
indexingStatuses(subgraphs: $ids) {
3+
health
4+
fatalError {
5+
message
6+
}
7+
nonFatalErrors {
8+
message
9+
}
10+
}
11+
}

0 commit comments

Comments
 (0)