Skip to content

Commit ad5c37f

Browse files
committed
Rename nWdtApps to n_apps, improve application start/restart/terminate functionality
1 parent 44087e6 commit ad5c37f

File tree

6 files changed

+81
-69
lines changed

6 files changed

+81
-69
lines changed

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ An example configuration file looks like this:
3131
```ini
3232
[processWatchdog]
3333
udp_port = 12345
34-
nWdtApps = 4
34+
n_apps = 4
3535

3636
1_name = Communicator
3737
1_start_delay = 10
@@ -60,7 +60,7 @@ nWdtApps = 4
6060

6161
### Fields
6262
- `udp_port` : The UDP port to expect heartbeats.
63-
- `nWdtApps` : Number of applications to manage (4 in the example).
63+
- `n_apps` : Number of applications to manage (4 in the example).
6464
- `name` : Name of the application.
6565
- `start_delay` : Delay in seconds before starting the application.
6666
- `heartbeat_delay` : Time in seconds to wait before expecting a heartbeat from the application.

config.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11

22
[processWatchdog]
33
udp_port = 12345
4-
nWdtApps = 4
4+
n_apps = 4
55
1_name = Communicator
66
1_start_delay = 10
77
1_heartbeat_delay = 60

run.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ echo "Starting $0"
8585

8686
while :
8787
do
88-
chmod +x ${homedir}/${app}
88+
chmod +x ${homedir}/${app}
8989
${homedir}/${app} -i ${homedir}/${ini}
9090
RET_CODE=$?
9191
CURRENTDATE=$(date +"%d-%m-%Y %T")
@@ -104,7 +104,7 @@ do
104104
fi
105105

106106
sleep 1
107-
107+
108108
sync
109109

110110
echo "Restarting $0"

src/apps.c

Lines changed: 74 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,8 @@
2626

2727
#include <stdio.h>
2828
#include <stdlib.h>
29-
#include <stdint.h>
3029
#include <stdbool.h>
3130
#include <string.h>
32-
#include <ctype.h>
33-
#include <limits.h>
3431
#include <time.h>
3532
#include <signal.h>
3633
#include <unistd.h>
@@ -189,7 +186,7 @@ static int handler(void *user, const char *section, const char *name, const char
189186
udp_port = atoi(value);
190187
}
191188

192-
if(MATCH(_section, "nWdtApps"))
189+
if(MATCH(_section, "n_apps"))
193190
{
194191
app_count = atoi(value);
195192
}
@@ -275,24 +272,25 @@ int read_ini_file()
275272

276273
bool is_application_running(int i)
277274
{
278-
pid_t result = -1;
279-
280275
if(apps[i].pid > 0)
281276
{
282277
// Check if the application is running on Linux
283278
if(kill(apps[i].pid, 0) == 0)
284279
{
285-
//LOGD("Process %s is running", apps[i].name);
286-
/* process is running or a zombie */
287-
result = 0;
280+
return true; // Process is running
281+
}
282+
else if(errno == EPERM)
283+
{
284+
LOGE("No permission to check if process %s is running : %s", apps[i].name, strerror(errno));
285+
return true;
288286
}
289287
else
290288
{
291289
LOGD("Process %s is not running : %s", apps[i].name, strerror(errno));
292290
}
293291
}
294292

295-
return (result == 0);
293+
return false;
296294
}
297295

298296
bool is_application_started(int i)
@@ -317,12 +315,17 @@ void start_application(int i)
317315
}
318316
else if(pid == 0)
319317
{
320-
/* Delete signal handlers */
321-
signal(SIGINT, SIG_DFL); // restart
322-
signal(SIGTERM, SIG_DFL); // terminate
323-
signal(SIGQUIT, SIG_DFL); // reboot
324-
signal(SIGUSR1, SIG_DFL); // terminate
325-
signal(SIGUSR2, SIG_DFL); // rfu
318+
// Child process
319+
// Reset signals to default
320+
struct sigaction sa;
321+
sa.sa_handler = SIG_DFL;
322+
sigemptyset(&sa.sa_mask);
323+
sa.sa_flags = 0;
324+
sigaction(SIGINT, &sa, NULL);
325+
sigaction(SIGTERM, &sa, NULL);
326+
sigaction(SIGQUIT, &sa, NULL);
327+
sigaction(SIGUSR1, &sa, NULL);
328+
sigaction(SIGUSR2, &sa, NULL);
326329
LOGD("Starting the process %s with CMD : %s", apps[i].name, apps[i].cmd);
327330
run_command(apps[i].cmd);
328331
LOGE("Process %s stopped running", apps[i].name);
@@ -344,68 +347,66 @@ void kill_application(int i)
344347
bool killed = false;
345348
LOGD("Killing process %s", apps[i].name);
346349

347-
// Send the SIGTERM signal to the application on Linux
348-
if(kill(apps[i].pid, SIGTERM) < 0)
350+
if(kill(apps[i].pid, SIGTERM) < 0 && errno != ESRCH)
349351
{
350-
if(errno != ESRCH) // No such process
351-
{
352-
LOGE("Failed to terminate process %s, error: %d - %s", apps[i].name, errno, strerror(errno));
353-
}
352+
LOGE("Failed to terminate process %s, error: %d - %s", apps[i].name, errno, strerror(errno));
354353
}
355354

356-
// Wait for the process to terminate
357355
int status;
358-
LOGD("Waiting for the process %s", apps[i].name);
359356
int max_wait = MAX_WAIT_PROCESS_TERMINATION; // [seconds]
357+
LOGD("Waiting for the process %s", apps[i].name);
360358

361359
do
362360
{
363361
sleep(1);
362+
int ret = waitpid(apps[i].pid, &status, WNOHANG | WUNTRACED | WCONTINUED);
364363

365-
if(waitpid(apps[i].pid, &status, WUNTRACED | WCONTINUED) < 0)
366-
{
367-
if(errno != ECHILD)
368-
{
369-
LOGE("Failed to wait for process %s, error : %d - %s", apps[i].name, errno, strerror(errno));
370-
}
371-
}
372-
373-
if(WIFEXITED(status))
364+
if(ret == 0)
374365
{
375-
LOGD("Process %s exited, status=%d", apps[i].name, WEXITSTATUS(status));
376-
max_wait = 0;
366+
LOGD("Process %s is still running", apps[i].name);
377367
}
378-
else if(WIFSIGNALED(status))
368+
else if(ret < 0)
379369
{
380-
LOGD("Process %s killed by signal %d", apps[i].name, WTERMSIG(status));
381-
max_wait = 0;
382-
}
383-
else if(WIFSTOPPED(status))
384-
{
385-
LOGD("Process %s stopped by signal %d", apps[i].name, WSTOPSIG(status));
386-
max_wait = 0;
370+
if(errno == ECHILD)
371+
{
372+
LOGD("Process %s already terminated", apps[i].name);
373+
max_wait = 0;
374+
}
375+
else
376+
{
377+
LOGE("Failed to wait for process %s, error: %d - %s", apps[i].name, errno, strerror(errno));
378+
}
387379
}
388-
else if(WIFCONTINUED(status))
380+
else if(ret > 0)
389381
{
390-
LOGD("Process %s continued", apps[i].name);
391-
max_wait--;
382+
if(WIFEXITED(status))
383+
{
384+
LOGD("Process %s exited, status=%d", apps[i].name, WEXITSTATUS(status));
385+
max_wait = 0;
386+
}
387+
else if(WIFSIGNALED(status))
388+
{
389+
LOGD("Process %s killed by signal %d", apps[i].name, WTERMSIG(status));
390+
max_wait = 0;
391+
}
392+
else if(WIFSTOPPED(status))
393+
{
394+
LOGD("Process %s stopped by signal %d", apps[i].name, WSTOPSIG(status));
395+
max_wait = 0;
396+
}
392397
}
393-
}
394-
while(0 < max_wait);
395398

396-
sleep(1);
399+
max_wait--;
400+
}
401+
while(max_wait > 0);
397402

398-
// If the process hasn't terminated after receiving SIGTERM, send the SIGKILL signal
399403
if(is_application_running(i))
400404
{
401405
LOGD("Sending SIGKILL to process %s", apps[i].name);
402406

403-
if(kill(apps[i].pid, SIGKILL) < 0)
407+
if(kill(apps[i].pid, SIGKILL) < 0 && errno != ESRCH)
404408
{
405-
if(errno != ESRCH) // No such process
406-
{
407-
LOGE("Failed to kill process %s, error : %d - %s", apps[i].name, errno, strerror(errno));
408-
}
409+
LOGE("Failed to kill process %s, error: %d - %s", apps[i].name, errno, strerror(errno));
409410
}
410411
else
411412
{
@@ -429,34 +430,44 @@ void kill_application(int i)
429430
apps[i].first_heartbeat = false;
430431
apps[i].pid = 0;
431432
}
433+
else
434+
{
435+
LOGE("Failed to terminate process %s", apps[i].name);
436+
}
432437
}
433438

434439
void restart_application(int i)
435440
{
436-
// Log that the application is being restarted
437441
LOGD("Restarting process %s", apps[i].name);
438442

439-
// Kill the existing instance of the application
440443
if(is_application_running(i))
441444
{
442445
kill_application(i);
443446
}
444447

445-
// Start a new instance of the application
446448
start_application(i);
447-
// Wait for the new instance of the application to start
448-
sleep(2);
449+
// Wait for the application to start
450+
int wait_time = 0;
451+
452+
while(wait_time < MAX_WAIT_PROCESS_START)
453+
{
454+
sleep(1);
455+
456+
if(is_application_running(i))
457+
{
458+
break;
459+
}
460+
461+
wait_time++;
462+
}
449463

450-
// Check if the new instance of the application is running
451464
if(!is_application_running(i))
452465
{
453466
LOGE("Failed to start process %s", apps[i].name);
454467
}
455468
else
456469
{
457-
// Update the last_heartbeat time to prevent immediate restart
458470
update_heartbeat_time(i);
459-
// Log that the application has been successfully restarted
460471
LOGI("Process %s restarted successfully", apps[i].name);
461472
}
462473
}

src/apps.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
#define MAX_APPS 6 /**< Maximum supported number of applications. */
3434
#define MAX_APP_CMD_LENGTH 256 /**< Maximum length of the command to start an application. */
3535
#define MAX_APP_NAME_LENGTH 32 /**< Maximum length of an application name. */
36+
#define MAX_WAIT_PROCESS_START 5 /**< Maximum time to wait for a process to start running (seconds). */
3637
#define MAX_WAIT_PROCESS_TERMINATION 30 /**< Maximum time to wait for a process to terminate (seconds). */
3738
#define INI_FILE "config.ini" /**< Default ini file path. */
3839

src/main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,7 @@ void help(char *progname)
229229
fprintf(stderr, GREEN "\nINI File example config:\n" RESET
230230
"[processWatchdog]\n"
231231
"udp_port = 12345\n"
232-
"nWdtApps = 4\n"
232+
"n_apps = 4\n"
233233
"1_name = App1\n"
234234
"1_start_delay = 10\n"
235235
"1_heartbeat_delay = 60\n"

0 commit comments

Comments
 (0)