Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions cmd/compute-domain-controller/cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ func (m *CleanupManager[T]) cleanup(ctx context.Context) {

computeDomain, err := m.getComputeDomain(uid)
if err != nil {
klog.Errorf("error getting ComputeDomain: %v", err)
klog.Errorf("Error getting ComputeDomain: %v", err)
continue
}

Expand All @@ -125,7 +125,7 @@ func (m *CleanupManager[T]) cleanup(ctx context.Context) {

klog.Infof("Cleanup: stale %T found for ComputeDomain '%s', running callback", *new(T), uid)
if err := m.callback(ctx, uid); err != nil {
klog.Errorf("error running CleanupManager callback: %v", err)
klog.Errorf("Error running CleanupManager callback: %v", err)
continue
}
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/compute-domain-controller/computedomain.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ func (m *ComputeDomainManager) Start(ctx context.Context) (rerr error) {
defer func() {
if rerr != nil {
if err := m.Stop(); err != nil {
klog.Errorf("error stopping ComputeDomain manager: %v", err)
klog.Errorf("Error stopping ComputeDomain manager: %v", err)
}
}
}()
Expand Down
2 changes: 1 addition & 1 deletion cmd/compute-domain-controller/daemonset.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func (m *DaemonSetManager) Start(ctx context.Context) (rerr error) {
defer func() {
if rerr != nil {
if err := m.Stop(); err != nil {
klog.Errorf("error stopping DaemonSet manager: %v", err)
klog.Errorf("Error stopping DaemonSet manager: %v", err)
}
}
}()
Expand Down
2 changes: 1 addition & 1 deletion cmd/compute-domain-controller/daemonsetpods.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ func (m *DaemonSetPodManager) Start(ctx context.Context) (rerr error) {
defer func() {
if rerr != nil {
if err := m.Stop(); err != nil {
klog.Errorf("error stopping DaemonSetPod manager: %v", err)
klog.Errorf("Error stopping DaemonSetPod manager: %v", err)
}
}
}()
Expand Down
2 changes: 1 addition & 1 deletion cmd/compute-domain-controller/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ func (m *NodeManager) Start(ctx context.Context) (rerr error) {
defer func() {
if rerr != nil {
if err := m.Stop(); err != nil {
klog.Errorf("error stopping Node manager: %v", err)
klog.Errorf("Error stopping Node manager: %v", err)
}
}
}()
Expand Down
2 changes: 1 addition & 1 deletion cmd/compute-domain-controller/resourceclaimtemplate.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ func (m *BaseResourceClaimTemplateManager) Start(ctx context.Context) (rerr erro
defer func() {
if rerr != nil {
if err := m.Stop(); err != nil {
klog.Errorf("error stopping ResourceClaimTemplate manager: %v", err)
klog.Errorf("Error stopping ResourceClaimTemplate manager: %v", err)
}
}
}()
Expand Down
4 changes: 2 additions & 2 deletions cmd/compute-domain-daemon/computedomain.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ func (m *ComputeDomainManager) Start(ctx context.Context) (rerr error) {
defer func() {
if rerr != nil {
if err := m.Stop(); err != nil {
klog.Errorf("error stopping ComputeDomainManager: %v", err)
klog.Errorf("Error stopping ComputeDomainManager: %v", err)
}
}
}()
Expand Down Expand Up @@ -333,7 +333,7 @@ func (m *ComputeDomainManager) MaybePushNodesUpdate(cd *nvapi.ComputeDomain) {
// wait for all 'numNodes' nodes to show up before sending an update.
if !featuregates.Enabled(featuregates.IMEXDaemonsWithDNSNames) {
if len(cd.Status.Nodes) != cd.Spec.NumNodes {
klog.Infof("numNodes: %d, nodes seen: %d", cd.Spec.NumNodes, len(cd.Status.Nodes))
klog.Infof("NumNodes: %d, nodes seen: %d", cd.Spec.NumNodes, len(cd.Status.Nodes))
return
}
}
Expand Down
18 changes: 9 additions & 9 deletions cmd/compute-domain-daemon/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ func run(ctx context.Context, cancel context.CancelFunc, flags *Flags) error {
podNamespace: flags.podNamespace,
maxNodesPerIMEXDomain: flags.maxNodesPerIMEXDomain,
}
klog.Infof("config: %v", config)
klog.Infof("Config: %v", config)

// Write the IMEX config with the current pod IP before starting the daemon
if err := writeIMEXConfig(flags.podIP); err != nil {
Expand Down Expand Up @@ -241,7 +241,7 @@ func run(ctx context.Context, cancel context.CancelFunc, flags *Flags) error {
go func() {
defer wg.Done()
if err := controller.Run(ctx); err != nil {
klog.Errorf("controller failed, initiate shutdown: %s", err)
klog.Errorf("Controller failed, initiate shutdown: %s", err)
cancel()
}
klog.Infof("Terminated: controller task")
Expand Down Expand Up @@ -275,7 +275,7 @@ func run(ctx context.Context, cancel context.CancelFunc, flags *Flags) error {
// Watchdog restarts the IMEX daemon upon unexpected termination, and
// shuts it down gracefully upon our own shutdown.
if err := processManager.Watchdog(ctx); err != nil {
klog.Errorf("watch failed, initiate shutdown: %s", err)
klog.Errorf("Watch failed, initiate shutdown: %s", err)
cancel()
}
klog.Infof("Terminated: process manager")
Expand All @@ -292,10 +292,10 @@ func run(ctx context.Context, cancel context.CancelFunc, flags *Flags) error {
// IMEX daemon nodes config file and (re)starting the IMEX daemon process.
func IMEXDaemonUpdateLoopWithIPs(ctx context.Context, controller *Controller, cliqueID string, pm *ProcessManager) error {
for {
klog.Infof("wait for nodes update")
klog.Infof("Wait for nodes update")
select {
case <-ctx.Done():
klog.Infof("shutdown: stop IMEXDaemonUpdateLoopWithIPs")
klog.Infof("Shutdown: stop IMEXDaemonUpdateLoopWithIPs")
return nil
case nodes := <-controller.GetNodesUpdateChan():
if err := writeNodesConfig(cliqueID, nodes); err != nil {
Expand All @@ -320,10 +320,10 @@ func IMEXDaemonUpdateLoopWithIPs(ctx context.Context, controller *Controller, cl
// unexpectedly and expectedly).
func IMEXDaemonUpdateLoopWithDNSNames(ctx context.Context, controller *Controller, processManager *ProcessManager, dnsNameManager *DNSNameManager) error {
for {
klog.Infof("wait for nodes update")
klog.Infof("Wait for nodes update")
select {
case <-ctx.Done():
klog.Infof("shutdown: stop IMEXDaemonUpdateLoopWithDNSNames")
klog.Infof("Shutdown: stop IMEXDaemonUpdateLoopWithDNSNames")
return nil
case nodes := <-controller.GetNodesUpdateChan():
updated, err := dnsNameManager.UpdateDNSNameMappings(nodes)
Expand All @@ -349,13 +349,13 @@ func IMEXDaemonUpdateLoopWithDNSNames(ctx context.Context, controller *Controlle

// Actively ask the IMEX daemon to re-read its config and to
// re-connect to its peers (involving DNS name re-resolution).
klog.Infof("updated DNS/IP mapping, old process: send SIGUSR1")
klog.Infof("Updated DNS/IP mapping, old process: send SIGUSR1")
if err := processManager.Signal(syscall.SIGUSR1); err != nil {
// Only log (ignore this error for now: if the process went away
// unexpectedly, the process manager will handle that. If any
// other error resulted in bad signal delivery, we may get away
// with it).
klog.Errorf("failed to send SIGUSR1 to child process: %s", err)
klog.Errorf("Failed to send SIGUSR1 to child process: %s", err)
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/compute-domain-kubelet-plugin/cdi.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func (cdi *CDIHandler) CreateStandardDeviceSpecFile(allocatable AllocatableDevic
}
defer func() {
if r := cdi.nvml.Shutdown(); r != nvml.SUCCESS {
klog.Warningf("failed to shutdown NVML: %v", r)
klog.Warningf("Failed to shutdown NVML: %v", r)
}
}()

Expand Down
10 changes: 5 additions & 5 deletions cmd/compute-domain-kubelet-plugin/computedomain.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ func (m *ComputeDomainManager) Start(ctx context.Context) (rerr error) {
defer func() {
if rerr != nil {
if err := m.Stop(); err != nil {
klog.Errorf("error stopping ComputeDomainManager: %v", err)
klog.Errorf("Error stopping ComputeDomainManager: %v", err)
}
}
}()
Expand Down Expand Up @@ -366,13 +366,13 @@ func (m *ComputeDomainManager) periodicCleanup(ctx context.Context) {
continue
}
if err != nil {
klog.Errorf("error checking for existence of directory '%s': %v", m.configFilesRoot, err)
klog.Errorf("Error checking for existence of directory '%s': %v", m.configFilesRoot, err)
continue
}

entries, err := os.ReadDir(m.configFilesRoot)
if err != nil {
klog.Errorf("error reading entries under directory '%s': %v", m.configFilesRoot, err)
klog.Errorf("Error reading entries under directory '%s': %v", m.configFilesRoot, err)
continue
}

Expand All @@ -387,7 +387,7 @@ func (m *ComputeDomainManager) periodicCleanup(ctx context.Context) {

computeDomain, err := m.GetComputeDomain(ctx, uid)
if err != nil {
klog.Errorf("error getting ComputeDomain: %v", err)
klog.Errorf("Error getting ComputeDomain: %v", err)
continue
}

Expand All @@ -399,7 +399,7 @@ func (m *ComputeDomainManager) periodicCleanup(ctx context.Context) {
klog.V(6).Infof("Stale directory found for ComputeDomain '%s', running cleanup", uid)

if err := os.RemoveAll(path); err != nil {
klog.Errorf("error removing artifacts directory for ComputeDomain '%s': %v", uid, err)
klog.Errorf("Error removing artifacts directory for ComputeDomain '%s': %v", uid, err)
continue
}
}
Expand Down
10 changes: 5 additions & 5 deletions cmd/compute-domain-kubelet-plugin/device_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
}

devRoot := containerDriverRoot.getDevRoot()
klog.Infof("using devRoot=%v", devRoot)
klog.Infof("Using devRoot=%v", devRoot)

hostDriverRoot := config.flags.hostDriverRoot
cdi, err := NewCDIHandler(
Expand Down Expand Up @@ -155,7 +155,7 @@ func (s *DeviceState) Prepare(ctx context.Context, claim *resourceapi.ResourceCl
// Make this a noop. Associated device(s) has/ave been prepared by us.
// Prepare() must be idempotent, as it may be invoked more than once per
// claim (and actual device preparation must happen at most once).
klog.V(6).Infof("skip prepare: claim %v found in checkpoint", claimUID)
klog.V(6).Infof("Skip prepare: claim %v found in checkpoint", claimUID)
return preparedClaim.PreparedDevices.GetDevices(), nil
}

Expand All @@ -168,7 +168,7 @@ func (s *DeviceState) Prepare(ctx context.Context, claim *resourceapi.ResourceCl
if err != nil {
return nil, fmt.Errorf("unable to update checkpoint: %w", err)
}
klog.V(6).Infof("checkpoint updated for claim %v", claimUID)
klog.V(6).Infof("Checkpoint updated for claim %v", claimUID)

preparedDevices, err := s.prepareDevices(ctx, claim)
if err != nil {
Expand All @@ -189,7 +189,7 @@ func (s *DeviceState) Prepare(ctx context.Context, claim *resourceapi.ResourceCl
if err != nil {
return nil, fmt.Errorf("unable to update checkpoint: %w", err)
}
klog.V(6).Infof("checkpoint updated for claim %v", claimUID)
klog.V(6).Infof("Checkpoint updated for claim %v", claimUID)

return preparedDevices.GetDevices(), nil
}
Expand All @@ -212,7 +212,7 @@ func (s *DeviceState) Unprepare(ctx context.Context, claimRef kubeletplugin.Name
// device was never prepared or has already been unprepared (assume that
// Prepare+Checkpoint are done transactionally). Note that
// claimRef.String() contains namespace, name, UID.
klog.Infof("unprepare noop: claim not found in checkpoint data: %v", claimRef.String())
klog.Infof("Unprepare noop: claim not found in checkpoint data: %v", claimRef.String())
return nil
}

Expand Down
4 changes: 2 additions & 2 deletions cmd/compute-domain-kubelet-plugin/driver.go
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ func (d *driver) nodePrepareResource(ctx context.Context, claim *resourceapi.Res
return false, res
}

klog.Infof("prepared devices for claim '%s/%s:%s': %v", claim.Namespace, claim.Name, claim.UID, devs)
klog.Infof("Prepared devices for claim '%s/%s:%s': %v", claim.Namespace, claim.Name, claim.UID, devs)
return true, kubeletplugin.PrepareResult{Devices: devs}
}

Expand All @@ -269,7 +269,7 @@ func (d *driver) nodeUnprepareResource(ctx context.Context, claimRef kubeletplug
return isPermanentError(err), fmt.Errorf("error unpreparing devices for claim '%v': %w", claimRef.String(), err)
}

klog.Infof("unprepared devices for claim '%v'", claimRef.String())
klog.Infof("Unprepared devices for claim '%v'", claimRef.String())
return true, nil
}

Expand Down
8 changes: 4 additions & 4 deletions cmd/compute-domain-kubelet-plugin/health.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error)
// are enabled and the filename includes a uid.
Path: path.Join(config.flags.kubeletRegistrarDirectoryPath, DriverName+"-reg.sock"),
}).String()
klog.V(6).Infof("connecting to registration socket path=%s", regSockPath)
klog.V(6).Infof("Connecting to registration socket path=%s", regSockPath)
regConn, err := grpc.NewClient(
regSockPath,
grpc.WithTransportCredentials(insecure.NewCredentials()),
Expand All @@ -77,7 +77,7 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error)
Scheme: "unix",
Path: path.Join(config.DriverPluginPath(), "dra.sock"),
}).String()
klog.V(6).Infof("connecting to DRA socket path=%s", draSockPath)
klog.V(6).Infof("Connecting to DRA socket path=%s", draSockPath)
draConn, err := grpc.NewClient(
draSockPath,
grpc.WithTransportCredentials(insecure.NewCredentials()),
Expand All @@ -97,9 +97,9 @@ func startHealthcheck(ctx context.Context, config *Config) (*healthcheck, error)
healthcheck.wg.Add(1)
go func() {
defer healthcheck.wg.Done()
klog.Infof("starting healthcheck service at %s", lis.Addr().String())
klog.Infof("Starting healthcheck service at %s", lis.Addr().String())
if err := server.Serve(lis); err != nil {
klog.Errorf("failed to serve healthcheck service on %s: %v", addr, err)
klog.Errorf("Failed to serve healthcheck service on %s: %v", addr, err)
}
}()

Expand Down
4 changes: 2 additions & 2 deletions cmd/compute-domain-kubelet-plugin/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,12 +225,12 @@ func RunPlugin(ctx context.Context, config *Config) error {
if err := ctx.Err(); err != nil && !errors.Is(err, context.Canceled) {
// A canceled context is the normal case here when the process receives
// a signal. Only log the error for more interesting cases.
klog.Errorf("error from context: %v", err)
klog.Errorf("Error from context: %v", err)
}

err = driver.Shutdown()
if err != nil {
klog.Errorf("unable to cleanly shutdown driver: %v", err)
klog.Errorf("Unable to cleanly shutdown driver: %v", err)
}

return nil
Expand Down
6 changes: 3 additions & 3 deletions cmd/compute-domain-kubelet-plugin/nvlib.go
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ func (l deviceLib) init() error {
func (l deviceLib) alwaysShutdown() {
ret := l.nvmllib.Shutdown()
if ret != nvml.SUCCESS {
klog.Warningf("error shutting down NVML: %v", ret)
klog.Warningf("Error shutting down NVML: %v", ret)
}
}

Expand Down Expand Up @@ -217,7 +217,7 @@ func (l deviceLib) getCliqueID() (string, error) {
}

if !isFabricAttached {
klog.Infof("no-clique fallback: fabric not attached (device %d/%s)", i, duid)
klog.Infof("No-clique fallback: fabric not attached (device %d/%s)", i, duid)
return nil
}

Expand All @@ -240,7 +240,7 @@ func (l deviceLib) getCliqueID() (string, error) {

uniqueClusterUUIDs[clusterUUID.String()] = struct{}{}
uniqueCliqueIDs[cliqueID] = struct{}{}
klog.Infof("identified fabric clique UUID/ID (device %d/%s): %s/%s", i, duid, clusterUUID.String(), cliqueID)
klog.Infof("Identified fabric clique UUID/ID (device %d/%s): %s/%s", i, duid, clusterUUID.String(), cliqueID)

return nil
})
Expand Down
2 changes: 1 addition & 1 deletion cmd/gpu-kubelet-plugin/cdi.go
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ func (cdi *CDIHandler) CreateStandardDeviceSpecFile(allocatable AllocatableDevic
}
defer func() {
if r := cdi.nvml.Shutdown(); r != nvml.SUCCESS {
klog.Warningf("failed to shutdown NVML: %v", r)
klog.Warningf("Failed to shutdown NVML: %v", r)
}
}()

Expand Down
12 changes: 6 additions & 6 deletions cmd/gpu-kubelet-plugin/device_state.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func NewDeviceState(ctx context.Context, config *Config) (*DeviceState, error) {
}

devRoot := containerDriverRoot.getDevRoot()
klog.Infof("using devRoot=%v", devRoot)
klog.Infof("Using devRoot=%v", devRoot)

hostDriverRoot := config.flags.hostDriverRoot
cdi, err := NewCDIHandler(
Expand Down Expand Up @@ -152,14 +152,14 @@ func (s *DeviceState) Prepare(ctx context.Context, claim *resourceapi.ResourceCl
if err != nil {
return nil, fmt.Errorf("unable to update checkpoint: %w", err)
}
klog.V(6).Infof("checkpoint updated for claim %v", claimUID)
klog.V(6).Infof("Checkpoint updated for claim %v", claimUID)

preparedClaim, exists := checkpoint.V2.PreparedClaims[claimUID]
if exists && preparedClaim.CheckpointState == ClaimCheckpointStatePrepareCompleted {
// Make this a noop. Associated device(s) has/ave been prepared by us.
// Prepare() must be idempotent, as it may be invoked more than once per
// claim (and actual device preparation must happen at most once).
klog.V(6).Infof("skip prepare: claim %v found in checkpoint", claimUID)
klog.V(6).Infof("Skip prepare: claim %v found in checkpoint", claimUID)
return preparedClaim.PreparedDevices.GetDevices(), nil
}

Expand All @@ -182,7 +182,7 @@ func (s *DeviceState) Prepare(ctx context.Context, claim *resourceapi.ResourceCl
if err != nil {
return nil, fmt.Errorf("unable to update checkpoint: %w", err)
}
klog.V(6).Infof("checkpoint updated for claim %v", claimUID)
klog.V(6).Infof("Checkpoint updated for claim %v", claimUID)

return preparedDevices.GetDevices(), nil
}
Expand All @@ -202,13 +202,13 @@ func (s *DeviceState) Unprepare(ctx context.Context, claimUID string) error {
// device was never prepared or has already been unprepared (assume that
// Prepare+Checkpoint are done transactionally). Note that
// claimRef.String() contains namespace, name, UID.
klog.Infof("unprepare noop: claim not found in checkpoint data: %v", claimUID)
klog.Infof("Unprepare noop: claim not found in checkpoint data: %v", claimUID)
return nil
}

switch pc.CheckpointState {
case ClaimCheckpointStatePrepareStarted:
klog.Infof("unprepare noop: claim preparation started but not completed: %v", claimUID)
klog.Infof("Unprepare noop: claim preparation started but not completed: %v", claimUID)
return nil
case ClaimCheckpointStatePrepareCompleted:
if err := s.unprepareDevices(ctx, claimUID, pc.PreparedDevices); err != nil {
Expand Down
Loading