@@ -1372,6 +1372,317 @@ async def restart_service(service_name: str) -> str:
13721372 logger .error ("Error occurred while restarting service '%s': %s" , service_name , str (e ))
13731373 return f"Error: Service '{ service_name } ' restart operation failed: { str (e )} "
13741374
1375+ @mcp .tool ()
1376+ @log_tool
1377+ async def start_host_component (host_name : str , component_name : str ) -> str :
1378+ """
1379+ Starts a specific component on a specific host in the Ambari cluster.
1380+
1381+ [Tool Role]: Dedicated tool for starting individual host-level components,
1382+ enabling fine-grained control without affecting other hosts or the overall service.
1383+
1384+ [Core Functions]:
1385+ - Start the specified component on the given host
1386+ - Skip if the component is already in STARTED state
1387+ - Return request information for progress tracking
1388+
1389+ [Required Usage Scenarios]:
1390+ - When a specific host's component is STOPPED but the overall service is STARTED
1391+ - When users request to start a component on a specific host (e.g., "start DataNode on host-A")
1392+ - When recovering a single component instance without restarting the entire service
1393+
1394+ Args:
1395+ host_name: Hostname where the component resides (e.g., "hdp-node-01.example.com")
1396+ component_name: Name of the component to start (e.g., "DATANODE", "NODEMANAGER")
1397+
1398+ Returns:
1399+ Start operation result (success: request info, failure: error message)
1400+ """
1401+ cluster_name = AMBARI_CLUSTER_NAME
1402+ try :
1403+ endpoint = f"/clusters/{ cluster_name } /hosts/{ host_name } /host_components/{ component_name } "
1404+ check = await make_ambari_request (endpoint )
1405+
1406+ if check is None or check .get ("error" ):
1407+ return f"Error: Component '{ component_name } ' not found on host '{ host_name } '."
1408+
1409+ current_state = check .get ("HostRoles" , {}).get ("state" , "UNKNOWN" )
1410+
1411+ if current_state == "STARTED" :
1412+ return f"Component '{ component_name } ' on '{ host_name } ' is already STARTED. No action needed."
1413+
1414+ payload = {
1415+ "RequestInfo" : {
1416+ "context" : f"Start { component_name } on { host_name } via MCP API"
1417+ },
1418+ "Body" : {
1419+ "HostRoles" : {
1420+ "state" : "STARTED"
1421+ }
1422+ }
1423+ }
1424+
1425+ response_data = await make_ambari_request (endpoint , method = "PUT" , data = payload )
1426+
1427+ if response_data is None or response_data .get ("error" ):
1428+ error_msg = response_data .get ("error" ) if response_data else "Unknown error occurred"
1429+ return f"Error: Failed to start '{ component_name } ' on '{ host_name } ' - { error_msg } "
1430+
1431+ request_info = response_data .get ("Requests" )
1432+ if request_info is None :
1433+ return f"Component '{ component_name } ' on '{ host_name } ' start command sent successfully. Previous state: { current_state } "
1434+
1435+ request_id = request_info .get ("id" , "Unknown" )
1436+ request_status = request_info .get ("status" , "Unknown" )
1437+ request_href = response_data .get ("href" , "" )
1438+
1439+ result_lines = [
1440+ f"START HOST COMPONENT: { component_name } on { host_name } " ,
1441+ "" ,
1442+ f"Cluster: { cluster_name } " ,
1443+ f"Host: { host_name } " ,
1444+ f"Component: { component_name } " ,
1445+ f"Previous State: { current_state } " ,
1446+ f"Request ID: { request_id } " ,
1447+ f"Status: { request_status } " ,
1448+ f"Monitor URL: { request_href } " ,
1449+ "" ,
1450+ "Use get_request_status(request_id) to track progress."
1451+ ]
1452+
1453+ return "\n " .join (result_lines )
1454+
1455+ except Exception as e :
1456+ return f"Error: Exception occurred while starting '{ component_name } ' on '{ host_name } ' - { str (e )} "
1457+
1458+ @mcp .tool ()
1459+ @log_tool
1460+ async def stop_host_component (host_name : str , component_name : str ) -> str :
1461+ """
1462+ Stops a specific component on a specific host in the Ambari cluster.
1463+
1464+ [Tool Role]: Dedicated tool for stopping individual host-level components,
1465+ enabling fine-grained control without affecting other hosts or the overall service.
1466+
1467+ [Core Functions]:
1468+ - Stop the specified component on the given host
1469+ - Skip if the component is already in INSTALLED (stopped) state
1470+ - Return request information for progress tracking
1471+
1472+ [Required Usage Scenarios]:
1473+ - When users request to stop a component on a specific host (e.g., "stop DataNode on host-A")
1474+ - When decommissioning or isolating a specific node's component
1475+ - When troubleshooting a single component instance
1476+
1477+ Args:
1478+ host_name: Hostname where the component resides (e.g., "hdp-node-01.example.com")
1479+ component_name: Name of the component to stop (e.g., "DATANODE", "NODEMANAGER")
1480+
1481+ Returns:
1482+ Stop operation result (success: request info, failure: error message)
1483+ """
1484+ cluster_name = AMBARI_CLUSTER_NAME
1485+ try :
1486+ endpoint = f"/clusters/{ cluster_name } /hosts/{ host_name } /host_components/{ component_name } "
1487+ check = await make_ambari_request (endpoint )
1488+
1489+ if check is None or check .get ("error" ):
1490+ return f"Error: Component '{ component_name } ' not found on host '{ host_name } '."
1491+
1492+ current_state = check .get ("HostRoles" , {}).get ("state" , "UNKNOWN" )
1493+
1494+ if current_state in ["INSTALLED" , "INSTALL_FAILED" ]:
1495+ return f"Component '{ component_name } ' on '{ host_name } ' is already stopped (state: { current_state } ). No action needed."
1496+
1497+ payload = {
1498+ "RequestInfo" : {
1499+ "context" : f"Stop { component_name } on { host_name } via MCP API"
1500+ },
1501+ "Body" : {
1502+ "HostRoles" : {
1503+ "state" : "INSTALLED"
1504+ }
1505+ }
1506+ }
1507+
1508+ response_data = await make_ambari_request (endpoint , method = "PUT" , data = payload )
1509+
1510+ if response_data is None or response_data .get ("error" ):
1511+ error_msg = response_data .get ("error" ) if response_data else "Unknown error occurred"
1512+ return f"Error: Failed to stop '{ component_name } ' on '{ host_name } ' - { error_msg } "
1513+
1514+ request_info = response_data .get ("Requests" )
1515+ if request_info is None :
1516+ return f"Component '{ component_name } ' on '{ host_name } ' stop command sent successfully. Previous state: { current_state } "
1517+
1518+ request_id = request_info .get ("id" , "Unknown" )
1519+ request_status = request_info .get ("status" , "Unknown" )
1520+ request_href = response_data .get ("href" , "" )
1521+
1522+ result_lines = [
1523+ f"STOP HOST COMPONENT: { component_name } on { host_name } " ,
1524+ "" ,
1525+ f"Cluster: { cluster_name } " ,
1526+ f"Host: { host_name } " ,
1527+ f"Component: { component_name } " ,
1528+ f"Previous State: { current_state } " ,
1529+ f"Request ID: { request_id } " ,
1530+ f"Status: { request_status } " ,
1531+ f"Monitor URL: { request_href } " ,
1532+ "" ,
1533+ "Use get_request_status(request_id) to track progress."
1534+ ]
1535+
1536+ return "\n " .join (result_lines )
1537+
1538+ except Exception as e :
1539+ return f"Error: Exception occurred while stopping '{ component_name } ' on '{ host_name } ' - { str (e )} "
1540+
1541+ @mcp .tool ()
1542+ @log_tool
1543+ async def restart_host_component (host_name : str , component_name : str ) -> str :
1544+ """
1545+ Restarts a specific component on a specific host in the Ambari cluster (stop then start).
1546+
1547+ [Tool Role]: Dedicated tool for restarting individual host-level components,
1548+ enabling fine-grained control without affecting other hosts or the overall service.
1549+
1550+ [Core Functions]:
1551+ - Stop the specified component on the given host and wait for completion
1552+ - Start the component and return request information
1553+ - Return clear success or error message for LLM automation
1554+
1555+ [Required Usage Scenarios]:
1556+ - When users request to restart a component on a specific host (e.g., "restart DataNode on host-A")
1557+ - When recovering a stuck or malfunctioning single component instance
1558+ - When applying config changes that require a component-level restart
1559+
1560+ Args:
1561+ host_name: Hostname where the component resides (e.g., "hdp-node-01.example.com")
1562+ component_name: Name of the component to restart (e.g., "DATANODE", "NODEMANAGER")
1563+
1564+ Returns:
1565+ Restart operation result (success: request info, failure: error message)
1566+ """
1567+ cluster_name = AMBARI_CLUSTER_NAME
1568+ try :
1569+ endpoint = f"/clusters/{ cluster_name } /hosts/{ host_name } /host_components/{ component_name } "
1570+ check = await make_ambari_request (endpoint )
1571+
1572+ if check is None or check .get ("error" ):
1573+ return f"Error: Component '{ component_name } ' not found on host '{ host_name } '."
1574+
1575+ initial_state = check .get ("HostRoles" , {}).get ("state" , "UNKNOWN" )
1576+
1577+ # Step 1: Stop
1578+ stop_payload = {
1579+ "RequestInfo" : {
1580+ "context" : f"Stop { component_name } on { host_name } via MCP API"
1581+ },
1582+ "Body" : {
1583+ "HostRoles" : {
1584+ "state" : "INSTALLED"
1585+ }
1586+ }
1587+ }
1588+
1589+ stop_response = await make_ambari_request (endpoint , method = "PUT" , data = stop_payload )
1590+
1591+ if stop_response is None or stop_response .get ("error" ):
1592+ error_msg = stop_response .get ("error" ) if stop_response else "Unknown error occurred"
1593+ return f"Error: Unable to stop '{ component_name } ' on '{ host_name } '. { error_msg } "
1594+
1595+ stop_requests = stop_response .get ("Requests" )
1596+ if stop_requests is None :
1597+ stop_request_id = "N/A (already stopped)"
1598+ else :
1599+ stop_request_id = stop_requests .get ("id" , "Unknown" )
1600+ if stop_request_id == "Unknown" :
1601+ return f"Error: Failed to retrieve stop request ID for '{ component_name } ' on '{ host_name } '."
1602+
1603+ # Step 2: Wait for stop to complete
1604+ while True :
1605+ status_response = await make_ambari_request (
1606+ f"/clusters/{ cluster_name } /requests/{ stop_request_id } "
1607+ )
1608+
1609+ if status_response is None or status_response .get ("error" ):
1610+ error_msg = status_response .get ("error" ) if status_response else "Unknown error"
1611+ return f"Error: Unable to check stop status for '{ component_name } ' on '{ host_name } '. { error_msg } "
1612+
1613+ req_info = status_response .get ("Requests" , {})
1614+ req_status = req_info .get ("request_status" , "Unknown" )
1615+ progress_percent = req_info .get ("progress_percent" , 0 )
1616+
1617+ if req_status == "COMPLETED" :
1618+ break
1619+ elif req_status in ["FAILED" , "ABORTED" ]:
1620+ return f"Error: Stop operation for '{ component_name } ' on '{ host_name } ' failed with status '{ req_status } '."
1621+
1622+ logger .info ("Stopping '%s' on '%s'... Progress: %d%%" , component_name , host_name , progress_percent )
1623+ await asyncio .sleep (2 )
1624+
1625+ # Step 3: Start
1626+ start_payload = {
1627+ "RequestInfo" : {
1628+ "context" : f"Start { component_name } on { host_name } via MCP API"
1629+ },
1630+ "Body" : {
1631+ "HostRoles" : {
1632+ "state" : "STARTED"
1633+ }
1634+ }
1635+ }
1636+
1637+ start_response = await make_ambari_request (endpoint , method = "PUT" , data = start_payload )
1638+
1639+ if start_response is None or start_response .get ("error" ):
1640+ error_msg = start_response .get ("error" ) if start_response else "Unknown error occurred"
1641+ return f"Error: Unable to start '{ component_name } ' on '{ host_name } '. { error_msg } "
1642+
1643+ start_requests = start_response .get ("Requests" )
1644+ if start_requests is None :
1645+ result_lines = [
1646+ f"RESTART HOST COMPONENT: { component_name } on { host_name } " ,
1647+ f"Stop Request ID: { stop_request_id } " ,
1648+ f"Start Request ID: N/A (immediate)" ,
1649+ "" ,
1650+ f"Cluster: { cluster_name } " ,
1651+ f"Host: { host_name } " ,
1652+ f"Component: { component_name } " ,
1653+ f"Initial State: { initial_state } " ,
1654+ f"Stop Status: COMPLETED" ,
1655+ f"Start Status: Command sent successfully" ,
1656+ "" ,
1657+ f"Next: get_host_details('{ host_name } ') to verify current state." ,
1658+ ]
1659+ return "\n " .join (result_lines )
1660+
1661+ start_request_id = start_requests .get ("id" , "Unknown" )
1662+ start_status = start_requests .get ("status" , "Unknown" )
1663+ start_href = start_response .get ("href" , "" )
1664+
1665+ result_lines = [
1666+ f"RESTART HOST COMPONENT: { component_name } on { host_name } " ,
1667+ f"Stop Request ID: { stop_request_id } " ,
1668+ f"Start Request ID: { start_request_id } " ,
1669+ "" ,
1670+ f"Cluster: { cluster_name } " ,
1671+ f"Host: { host_name } " ,
1672+ f"Component: { component_name } " ,
1673+ f"Initial State: { initial_state } " ,
1674+ f"Stop Status: COMPLETED" ,
1675+ f"Start Status: { start_status } " ,
1676+ f"Start Monitor URL: { start_href } " ,
1677+ "" ,
1678+ f"Next: get_request_status({ start_request_id } ) for updates." if start_request_id != "Unknown" else f"Next: get_host_details('{ host_name } ') to verify state soon." ,
1679+ ]
1680+ return "\n " .join (result_lines )
1681+
1682+ except Exception as e :
1683+ logger .error ("Error occurred while restarting '%s' on '%s': %s" , component_name , host_name , str (e ))
1684+ return f"Error: Restart of '{ component_name } ' on '{ host_name } ' failed: { str (e )} "
1685+
13751686@mcp .tool ()
13761687@log_tool
13771688async def restart_all_services () -> str :
0 commit comments