26
26
27
27
#include <stdio.h>
28
28
#include <stdlib.h>
29
- #include <stdint.h>
30
29
#include <stdbool.h>
31
30
#include <string.h>
32
- #include <ctype.h>
33
- #include <limits.h>
34
31
#include <time.h>
35
32
#include <signal.h>
36
33
#include <unistd.h>
@@ -189,7 +186,7 @@ static int handler(void *user, const char *section, const char *name, const char
189
186
udp_port = atoi (value );
190
187
}
191
188
192
- if (MATCH (_section , "nWdtApps " ))
189
+ if (MATCH (_section , "n_apps " ))
193
190
{
194
191
app_count = atoi (value );
195
192
}
@@ -275,24 +272,25 @@ int read_ini_file()
275
272
276
273
bool is_application_running (int i )
277
274
{
278
- pid_t result = -1 ;
279
-
280
275
if (apps [i ].pid > 0 )
281
276
{
282
277
// Check if the application is running on Linux
283
278
if (kill (apps [i ].pid , 0 ) == 0 )
284
279
{
285
- //LOGD("Process %s is running", apps[i].name);
286
- /* process is running or a zombie */
287
- result = 0 ;
280
+ return true; // Process is running
281
+ }
282
+ else if (errno == EPERM )
283
+ {
284
+ LOGE ("No permission to check if process %s is running : %s" , apps [i ].name , strerror (errno ));
285
+ return true;
288
286
}
289
287
else
290
288
{
291
289
LOGD ("Process %s is not running : %s" , apps [i ].name , strerror (errno ));
292
290
}
293
291
}
294
292
295
- return ( result == 0 ) ;
293
+ return false ;
296
294
}
297
295
298
296
bool is_application_started (int i )
@@ -317,12 +315,17 @@ void start_application(int i)
317
315
}
318
316
else if (pid == 0 )
319
317
{
320
- /* Delete signal handlers */
321
- signal (SIGINT , SIG_DFL ); // restart
322
- signal (SIGTERM , SIG_DFL ); // terminate
323
- signal (SIGQUIT , SIG_DFL ); // reboot
324
- signal (SIGUSR1 , SIG_DFL ); // terminate
325
- signal (SIGUSR2 , SIG_DFL ); // rfu
318
+ // Child process
319
+ // Reset signals to default
320
+ struct sigaction sa ;
321
+ sa .sa_handler = SIG_DFL ;
322
+ sigemptyset (& sa .sa_mask );
323
+ sa .sa_flags = 0 ;
324
+ sigaction (SIGINT , & sa , NULL );
325
+ sigaction (SIGTERM , & sa , NULL );
326
+ sigaction (SIGQUIT , & sa , NULL );
327
+ sigaction (SIGUSR1 , & sa , NULL );
328
+ sigaction (SIGUSR2 , & sa , NULL );
326
329
LOGD ("Starting the process %s with CMD : %s" , apps [i ].name , apps [i ].cmd );
327
330
run_command (apps [i ].cmd );
328
331
LOGE ("Process %s stopped running" , apps [i ].name );
@@ -344,68 +347,66 @@ void kill_application(int i)
344
347
bool killed = false;
345
348
LOGD ("Killing process %s" , apps [i ].name );
346
349
347
- // Send the SIGTERM signal to the application on Linux
348
- if (kill (apps [i ].pid , SIGTERM ) < 0 )
350
+ if (kill (apps [i ].pid , SIGTERM ) < 0 && errno != ESRCH )
349
351
{
350
- if (errno != ESRCH ) // No such process
351
- {
352
- LOGE ("Failed to terminate process %s, error: %d - %s" , apps [i ].name , errno , strerror (errno ));
353
- }
352
+ LOGE ("Failed to terminate process %s, error: %d - %s" , apps [i ].name , errno , strerror (errno ));
354
353
}
355
354
356
- // Wait for the process to terminate
357
355
int status ;
358
- LOGD ("Waiting for the process %s" , apps [i ].name );
359
356
int max_wait = MAX_WAIT_PROCESS_TERMINATION ; // [seconds]
357
+ LOGD ("Waiting for the process %s" , apps [i ].name );
360
358
361
359
do
362
360
{
363
361
sleep (1 );
362
+ int ret = waitpid (apps [i ].pid , & status , WNOHANG | WUNTRACED | WCONTINUED );
364
363
365
- if (waitpid (apps [i ].pid , & status , WUNTRACED | WCONTINUED ) < 0 )
366
- {
367
- if (errno != ECHILD )
368
- {
369
- LOGE ("Failed to wait for process %s, error : %d - %s" , apps [i ].name , errno , strerror (errno ));
370
- }
371
- }
372
-
373
- if (WIFEXITED (status ))
364
+ if (ret == 0 )
374
365
{
375
- LOGD ("Process %s exited, status=%d" , apps [i ].name , WEXITSTATUS (status ));
376
- max_wait = 0 ;
366
+ LOGD ("Process %s is still running" , apps [i ].name );
377
367
}
378
- else if (WIFSIGNALED ( status ) )
368
+ else if (ret < 0 )
379
369
{
380
- LOGD ("Process %s killed by signal %d" , apps [i ].name , WTERMSIG (status ));
381
- max_wait = 0 ;
382
- }
383
- else if (WIFSTOPPED (status ))
384
- {
385
- LOGD ("Process %s stopped by signal %d" , apps [i ].name , WSTOPSIG (status ));
386
- max_wait = 0 ;
370
+ if (errno == ECHILD )
371
+ {
372
+ LOGD ("Process %s already terminated" , apps [i ].name );
373
+ max_wait = 0 ;
374
+ }
375
+ else
376
+ {
377
+ LOGE ("Failed to wait for process %s, error: %d - %s" , apps [i ].name , errno , strerror (errno ));
378
+ }
387
379
}
388
- else if (WIFCONTINUED ( status ) )
380
+ else if (ret > 0 )
389
381
{
390
- LOGD ("Process %s continued" , apps [i ].name );
391
- max_wait -- ;
382
+ if (WIFEXITED (status ))
383
+ {
384
+ LOGD ("Process %s exited, status=%d" , apps [i ].name , WEXITSTATUS (status ));
385
+ max_wait = 0 ;
386
+ }
387
+ else if (WIFSIGNALED (status ))
388
+ {
389
+ LOGD ("Process %s killed by signal %d" , apps [i ].name , WTERMSIG (status ));
390
+ max_wait = 0 ;
391
+ }
392
+ else if (WIFSTOPPED (status ))
393
+ {
394
+ LOGD ("Process %s stopped by signal %d" , apps [i ].name , WSTOPSIG (status ));
395
+ max_wait = 0 ;
396
+ }
392
397
}
393
- }
394
- while (0 < max_wait );
395
398
396
- sleep (1 );
399
+ max_wait -- ;
400
+ }
401
+ while (max_wait > 0 );
397
402
398
- // If the process hasn't terminated after receiving SIGTERM, send the SIGKILL signal
399
403
if (is_application_running (i ))
400
404
{
401
405
LOGD ("Sending SIGKILL to process %s" , apps [i ].name );
402
406
403
- if (kill (apps [i ].pid , SIGKILL ) < 0 )
407
+ if (kill (apps [i ].pid , SIGKILL ) < 0 && errno != ESRCH )
404
408
{
405
- if (errno != ESRCH ) // No such process
406
- {
407
- LOGE ("Failed to kill process %s, error : %d - %s" , apps [i ].name , errno , strerror (errno ));
408
- }
409
+ LOGE ("Failed to kill process %s, error: %d - %s" , apps [i ].name , errno , strerror (errno ));
409
410
}
410
411
else
411
412
{
@@ -429,34 +430,44 @@ void kill_application(int i)
429
430
apps [i ].first_heartbeat = false;
430
431
apps [i ].pid = 0 ;
431
432
}
433
+ else
434
+ {
435
+ LOGE ("Failed to terminate process %s" , apps [i ].name );
436
+ }
432
437
}
433
438
434
439
void restart_application (int i )
435
440
{
436
- // Log that the application is being restarted
437
441
LOGD ("Restarting process %s" , apps [i ].name );
438
442
439
- // Kill the existing instance of the application
440
443
if (is_application_running (i ))
441
444
{
442
445
kill_application (i );
443
446
}
444
447
445
- // Start a new instance of the application
446
448
start_application (i );
447
- // Wait for the new instance of the application to start
448
- sleep (2 );
449
+ // Wait for the application to start
450
+ int wait_time = 0 ;
451
+
452
+ while (wait_time < MAX_WAIT_PROCESS_START )
453
+ {
454
+ sleep (1 );
455
+
456
+ if (is_application_running (i ))
457
+ {
458
+ break ;
459
+ }
460
+
461
+ wait_time ++ ;
462
+ }
449
463
450
- // Check if the new instance of the application is running
451
464
if (!is_application_running (i ))
452
465
{
453
466
LOGE ("Failed to start process %s" , apps [i ].name );
454
467
}
455
468
else
456
469
{
457
- // Update the last_heartbeat time to prevent immediate restart
458
470
update_heartbeat_time (i );
459
- // Log that the application has been successfully restarted
460
471
LOGI ("Process %s restarted successfully" , apps [i ].name );
461
472
}
462
473
}
0 commit comments