@@ -35,6 +35,8 @@ use gorgone::class::frame;
3535use JSON::XS;
3636use Time::HiRes;
3737use POSIX qw( strftime) ;
38+ use DateTime;
39+ use DateTime::Format::Strptime;
3840use Digest::MD5 qw( md5_hex) ;
3941use Try::Tiny;
4042use EV;
@@ -149,6 +151,125 @@ sub hdisco_is_running_job {
149151
150152 return 0;
151153}
154+ =head3 $self->hdisco_can_start_job(job => $jobRef)
155+
156+ Check if we can start a host discovery job.
157+ If the job is in timeout, update the job status in db and run the job again
158+
159+ For now there is no real mutex on the execution except the db column status,
160+ so if the post execution command outlive the timeout undefined behaviour may appear.
161+
162+ Parameters:
163+
164+ =over 4
165+
166+ =item * job: job information hash ref. Required data to work correctly are :
167+
168+ =over 4
169+
170+ =item * status: the job status (see JOB_FINISH and other constant for possible states)
171+
172+ =item * job_id: the job unique identifier. Used to set the job as failure if timeout is reached.
173+
174+ =item * last_execution : hash containing 'timezone' and 'date' (ex 'Europe/Paris', '2026-01-30 20:31:00.000000')
175+
176+ =back
177+
178+ =back
179+
180+ Output : Bool
181+
182+ 1 if the job should be started
183+
184+ 0 if invalid data given, or job correctly running and should not be started.
185+ =cut
186+ sub hdisco_can_start_job {
187+ my ($self , %options ) = @_ ;
188+ if (!$options {job } || !defined ($options {job }-> {status }) || !defined ($options {job }-> {job_id })){
189+ return 0;
190+ }
191+ if ($options {job }-> {status } != JOB_RUNNING &&
192+ $options {job }-> {status } != SAVE_RUNNING) {
193+ # if job is not running, we can start it safely
194+ return 1;
195+ }
196+ if (!defined ($options {job }-> {last_execution }) || !defined ($options {job }-> {last_execution }-> {date })) {
197+ # probably first run of the job
198+ return 1;
199+ }
200+ if ($options {job }-> {execution }-> {mode } != 1){
201+ # never timeout manual or paused jobs, only retry automatic jobs
202+ return 0;
203+ }
204+
205+ my $second_since_last_exec = $self -> _get_duration_since_last_exec(
206+ date => $options {job }-> {last_execution }-> {date },
207+ timezone => $options {job }-> {last_execution }-> {timezone } // ' UTC'
208+ ) or return 0; # could not parse the date, don't try to start the job again.
209+
210+ my $timeout = $options {timeout } // $self -> {global_timeout };
211+ if ($second_since_last_exec <= $timeout * 2 + 10) {
212+ # job did not reach timeout, let it run, don't start it again.
213+ return 0;
214+ }
215+
216+ # job is in timeout, restarting it.
217+ $self -> {logger }-> writeLogError(
218+ " [autodiscovery] job is timing out (last execution: '" . $options {job }-> {last_execution }-> {date } . " '), we set it as failed and restart it" );
219+ return 0 if -1 == $self -> update_job_information(
220+ values => {
221+ status => JOB_FAILED,
222+ message => ' Job timed out and will be restarted by Gorgone' ,
223+ },
224+ where_clause => [
225+ { id => $options {job }-> {job_id } }
226+ ]);
227+ $self -> {hdisco_jobs_ids }-> { $options {job }-> {job_id } }-> {status } = JOB_FAILED;
228+ return 1;
229+
230+ }
231+ =head3 $self->_get_duration_since_last_exec(date => $dateStr, timezone => $tzStr)
232+
233+ Calculate the duration in seconds since the last job execution.
234+
235+ Parameters:
236+
237+ =over 4
238+
239+ =item * date: execution date string in format 'YYYY-MM-DD HH:MM:SS.NNNNNN' (e.g., '2026-01-30 20:31:00.000000')
240+
241+ =item * timezone: timezone string for the date (e.g., 'Europe/Paris', 'UTC')
242+
243+ =back
244+
245+ Output : Int or undef
246+
247+ Number of seconds since the last execution.
248+
249+ Returns undef if the date cannot be parsed or if the date is in the future.
250+
251+ =cut
252+ sub _get_duration_since_last_exec {
253+ my ($self , %options ) = @_ ;
254+
255+ # parse format "2026-01-29 15:35:12.000000" given by php api.
256+ my $last_exec = DateTime::Format::Strptime-> new(
257+ pattern => ' %Y-%m-%d %H:%M:%S.%6N' ,
258+ time_zone => $options {timezone },
259+ )-> parse_datetime($options {date }); # will return undef on any failure.
260+
261+ if (!defined ($last_exec )) {
262+ $self -> {logger }-> writeLogWarning(" [autodiscovery] can not parse last execution date '" . $options {date } . " from job, job won't start." );
263+ return undef ;
264+ }
265+
266+ my $duration = DateTime-> now()-> epoch() - $last_exec -> epoch();
267+ if ($duration < 0) {
268+ $self -> {logger }-> writeLogWarning(" [autodiscovery] last execution date '" . $options {date } . " ' is in the future, job won't start." );
269+ return undef ;
270+ }
271+ return $duration ;
272+ }
152273
153274sub hdisco_add_cron {
154275 my ($self , %options ) = @_ ;
@@ -443,7 +564,7 @@ sub launchhostdiscovery {
443564 if (!defined ($job_id ) || !defined ($self -> {hdisco_jobs_ids }-> {$job_id })) {
444565 return (1, ' trying to launch discovery for inexistant job' );
445566 }
446- if ($self -> hdisco_is_running_job( status => $self -> {hdisco_jobs_ids }-> {$job_id } -> { status })) {
567+ if (! $self -> hdisco_can_start_job( job => $self -> {hdisco_jobs_ids }-> {$job_id })) {
447568 return (1, ' job is already running' );
448569 }
449570 if ($self -> {hdisco_jobs_ids }-> {$job_id }-> {execution }-> {mode } == EXECUTION_MODE_PAUSE && $options {source } eq ' cron' ) {
@@ -927,7 +1048,7 @@ sub update_job_information {
9271048
9281049 my ($status ) = $self -> {class_object_centreon }-> custom_execute(request => $query , bind_values => \@bind_values );
9291050 if ($status == -1) {
930- $self -> {logger }-> writeLogError(' [autodiscovery] Failed to update job information' );
1051+ $self -> {logger }-> writeLogError(' [autodiscovery] Failed to update job information for ' . join ( ' , ' , @bind_values ) );
9311052 return -1;
9321053 }
9331054
0 commit comments