[PATCH v2 1/2] procd: add service instance watchdog

Daniel Bailey danielb at meshplusplus.com
Mon Jul 13 18:05:31 EDT 2020


Added instance watchdog which will eventually either terminate
or respawn an instance depending on the instance respawn setting.

Added service ubus method 'watchdog' which services the watchdog
timer and allows update of the instance watchdog mode instance.

Two modes: disabled or passive.

Disabled: cancels watchdog timer set for a given instance.

Passive: sets a instance timer which must be serviced or the
instance will be stopped/restarted (dependent upon the instance
respawn value) when the timer expires.

Signed-off-by: Daniel Bailey <danielb at meshplusplus.com>
---
 service/instance.c | 67 ++++++++++++++++++++++++++++++++++++++++++++++
 service/instance.h | 15 +++++++++++
 service/service.c  | 66 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 148 insertions(+)

diff --git a/service/instance.c b/service/instance.c
index c65da50..c83e227 100644
--- a/service/instance.c
+++ b/service/instance.c
@@ -66,6 +66,7 @@ enum {
 	INSTANCE_ATTR_OVERLAYDIR,
 	INSTANCE_ATTR_TMPOVERLAYSIZE,
 	INSTANCE_ATTR_BUNDLE,
+	INSTANCE_ATTR_WATCHDOG,
 	__INSTANCE_ATTR_MAX
 };
 
@@ -97,6 +98,7 @@ static const struct blobmsg_policy instance_attr[__INSTANCE_ATTR_MAX] = {
 	[INSTANCE_ATTR_OVERLAYDIR] = { "overlaydir", BLOBMSG_TYPE_STRING },
 	[INSTANCE_ATTR_TMPOVERLAYSIZE] = { "tmpoverlaysize", BLOBMSG_TYPE_STRING },
 	[INSTANCE_ATTR_BUNDLE] = { "bundle", BLOBMSG_TYPE_STRING },
+	[INSTANCE_ATTR_WATCHDOG] = { "watchdog", BLOBMSG_TYPE_ARRAY },
 };
 
 enum {
@@ -553,6 +555,11 @@ instance_start(struct service_instance *in)
 		fcntl(epipe[0], F_SETFD, FD_CLOEXEC);
 	}
 
+	if (in->watchdog.mode != INSTANCE_WATCHDOG_MODE_DISABLED) {
+		uloop_timeout_set(&in->watchdog.timeout, in->watchdog.freq * 1000);
+		DEBUG(2, "Started instance %s::%s watchdog timer : timeout = %d\n", in->srv->name, in->name, in->watchdog.freq);
+	}
+
 	service_event("instance.start", in->srv->name, in->name);
 }
 
@@ -700,6 +707,7 @@ instance_exit(struct uloop_process *p, int ret)
 
 	in->exit_code = instance_exit_code(ret);
 	uloop_timeout_cancel(&in->timeout);
+	uloop_timeout_cancel(&in->watchdog.timeout);
 	service_event("instance.stop", in->srv->name, in->name);
 
 	if (in->halt) {
@@ -759,6 +767,19 @@ instance_restart(struct service_instance *in)
 	uloop_timeout_set(&in->timeout, in->term_timeout * 1000);
 }
 
+static void
+instance_watchdog(struct uloop_timeout *t)
+{
+	struct service_instance *in = container_of(t, struct service_instance, watchdog.timeout);
+
+	DEBUG(3, "instance %s::%s watchdog timer expired\n", in->srv->name, in->name);
+
+	if (in->respawn)
+		instance_restart(in);
+	else
+		instance_stop(in, true);
+}
+
 static bool string_changed(const char *a, const char *b)
 {
 	return !((!a && !b) || (a && b && !strcmp(a, b)));
@@ -825,6 +846,12 @@ instance_config_changed(struct service_instance *in, struct service_instance *in
 	if (!blobmsg_list_equal(&in->errors, &in_new->errors))
 		return true;
 
+	if (in->watchdog.mode != in_new->watchdog.mode)
+		return true;
+
+	if (in->watchdog.freq != in_new->watchdog.freq)
+		return true;
+
 	return false;
 }
 
@@ -1184,6 +1211,35 @@ instance_config_parse(struct service_instance *in)
 			DEBUG(3, "unknown syslog facility '%s' given, using default (LOG_DAEMON)\n", blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY]));
 	}
 
+	if (tb[INSTANCE_ATTR_WATCHDOG]) {
+		int i = 0;
+		uint32_t vals[2] = { 0, 30 };
+
+		blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_WATCHDOG], rem) {
+			if (i >= 2)
+				break;
+
+			vals[i] = atoi(blobmsg_get_string(cur2));
+			i++;
+		}
+
+		if (vals[0] >= 0 && vals[0] < __INSTANCE_WATCHDOG_MODE_MAX) {
+			in->watchdog.mode = vals[0];
+			DEBUG(3, "setting watchdog mode (%d)\n", vals[0]);
+		} else {
+			in->watchdog.mode = 0;
+			DEBUG(3, "unknown watchdog mode (%d) given, using default (0)\n", vals[0]);
+		}
+
+		if (vals[1] > 0) {
+			in->watchdog.freq = vals[1];
+			DEBUG(3, "setting watchdog timeout (%d)\n", vals[0]);
+		} else {
+			in->watchdog.freq = 30;
+			DEBUG(3, "invalid watchdog timeout (%d) given, using default (30)\n", vals[1]);
+		}
+	}
+
 	return true;
 }
 
@@ -1269,6 +1325,7 @@ instance_free(struct service_instance *in)
 	instance_free_stdio(in);
 	uloop_process_delete(&in->proc);
 	uloop_timeout_cancel(&in->timeout);
+	uloop_timeout_cancel(&in->watchdog.timeout);
 	trigger_del(in);
 	watch_del(in);
 	instance_config_cleanup(in);
@@ -1323,6 +1380,9 @@ instance_init(struct service_instance *in, struct service *s, struct blob_attr *
 	blobmsg_list_simple_init(&in->limits);
 	blobmsg_list_simple_init(&in->errors);
 	blobmsg_list_simple_init(&in->jail.mount);
+
+	in->watchdog.timeout.cb = instance_watchdog;
+
 	in->valid = instance_config_parse(in);
 }
 
@@ -1444,5 +1504,12 @@ void instance_dump(struct blob_buf *b, struct service_instance *in, int verbose)
 	if (verbose && in->trigger)
 		blobmsg_add_blob(b, in->trigger);
 
+	if (in->watchdog.mode != INSTANCE_WATCHDOG_MODE_DISABLED) {
+		void *r = blobmsg_open_table(b, "watchdog");
+		blobmsg_add_u32(b, "mode", in->watchdog.mode);
+		blobmsg_add_u32(b, "timeout", in->watchdog.freq);
+		blobmsg_close_table(b, r);
+	}
+
 	blobmsg_close_table(b, i);
 }
diff --git a/service/instance.h b/service/instance.h
index e8ee15c..bb8a0c4 100644
--- a/service/instance.h
+++ b/service/instance.h
@@ -39,6 +39,19 @@ struct jail {
 	int argc;
 };
 
+typedef enum instance_watchdog {
+	INSTANCE_WATCHDOG_MODE_DISABLED,
+	INSTANCE_WATCHDOG_MODE_PASSIVE,
+	INSTANCE_WATCHDOG_MODE_ACTIVE,
+	__INSTANCE_WATCHDOG_MODE_MAX,
+} instance_watchdog_mode_t;
+
+struct watchdog {
+	instance_watchdog_mode_t mode;
+	uint32_t freq;
+	struct uloop_timeout timeout;
+};
+
 struct service_instance {
 	struct vlist_node node;
 	struct service *srv;
@@ -95,6 +108,8 @@ struct service_instance {
 	struct blobmsg_list file;
 	struct blobmsg_list limits;
 	struct blobmsg_list errors;
+
+	struct watchdog watchdog;
 };
 
 void instance_start(struct service_instance *in);
diff --git a/service/service.c b/service/service.c
index fcf0215..9a174bc 100644
--- a/service/service.c
+++ b/service/service.c
@@ -784,6 +784,71 @@ err_console_fd:
 	return UBUS_STATUS_INVALID_ARGUMENT;
 }
 
+enum {
+	SERVICE_WATCHDOG_MODE,
+	SERVICE_WATCHDOG_TIMEOUT,
+	SERVICE_WATCHDOG_NAME,
+	SERVICE_WATCHDOG_INSTANCE,
+	__SERVICE_WATCHDOG_MAX,
+};
+
+static const struct blobmsg_policy service_watchdog_policy[__SERVICE_WATCHDOG_MAX] = {
+	[SERVICE_WATCHDOG_MODE] = { "mode", BLOBMSG_TYPE_INT32 },
+	[SERVICE_WATCHDOG_NAME] = { "name", BLOBMSG_TYPE_STRING },
+	[SERVICE_WATCHDOG_TIMEOUT] = { "timeout", BLOBMSG_TYPE_INT32 },
+	[SERVICE_WATCHDOG_INSTANCE] = { "instance", BLOBMSG_TYPE_STRING },
+};
+
+static int
+service_handle_watchdog(struct ubus_context *ctx, struct ubus_object *obj,
+		    struct ubus_request_data *req, const char *method,
+		    struct blob_attr *msg)
+{
+	struct blob_attr *tb[__SERVICE_WATCHDOG_MAX] = {0};
+	struct service *s;
+	struct blob_attr *cur;
+	struct service_instance *in;
+
+	blobmsg_parse(service_watchdog_policy, __SERVICE_WATCHDOG_MAX, tb, blobmsg_data(msg), blobmsg_data_len(msg));
+	cur = tb[SERVICE_WATCHDOG_NAME];
+	if (!cur)
+		return UBUS_STATUS_NOT_FOUND;
+
+	s = avl_find_element(&services, blobmsg_data(cur), s, avl);
+	if (!s)
+		return UBUS_STATUS_NOT_FOUND;
+
+	cur = tb[SERVICE_WATCHDOG_INSTANCE];
+	if (!cur)
+		return UBUS_STATUS_NOT_FOUND;
+
+	in = vlist_find(&s->instances, blobmsg_data(cur), in, node);
+	if (!in) {
+		ERROR("instance %s not found\n", blobmsg_get_string(cur));
+		return UBUS_STATUS_NOT_FOUND;
+	}
+
+	if (tb[SERVICE_WATCHDOG_MODE])
+		in->watchdog.mode = blobmsg_get_u32(tb[SERVICE_WATCHDOG_MODE]);
+
+	if (tb[SERVICE_WATCHDOG_TIMEOUT])
+		in->watchdog.freq = blobmsg_get_u32(tb[SERVICE_WATCHDOG_TIMEOUT]);
+
+	if (in->watchdog.mode == INSTANCE_WATCHDOG_MODE_DISABLED)
+		uloop_timeout_cancel(&in->watchdog.timeout);
+	else
+		uloop_timeout_set(&in->watchdog.timeout, in->watchdog.freq * 1000);
+
+	blob_buf_init(&b, 0);
+	blobmsg_add_string(&b, "name", blobmsg_get_string(tb[SERVICE_WATCHDOG_NAME]));
+	blobmsg_add_string(&b, "instance", blobmsg_get_string(tb[SERVICE_WATCHDOG_INSTANCE]));
+	blobmsg_add_u32(&b, "mode", in->watchdog.mode);
+	blobmsg_add_u32(&b, "timeout", in->watchdog.freq);
+
+	ubus_send_reply(ctx, req, b.head);
+
+	return UBUS_STATUS_OK;
+}
 
 static struct ubus_method main_object_methods[] = {
 	UBUS_METHOD("set", service_handle_set, service_set_attrs),
@@ -797,6 +862,7 @@ static struct ubus_method main_object_methods[] = {
 	UBUS_METHOD("validate", service_handle_validate, validate_policy),
 	UBUS_METHOD("get_data", service_get_data, get_data_policy),
 	UBUS_METHOD("state", service_handle_state, service_state_attrs),
+	UBUS_METHOD("watchdog", service_handle_watchdog, service_watchdog_policy),
 };
 
 static struct ubus_object_type main_object_type =
-- 
2.25.1




More information about the openwrt-devel mailing list