[OpenWrt-Devel] [PATCH 1/2] procd: add service instance watchdog

Daniel Bailey danielb at meshplusplus.com
Fri May 29 21:32:55 EDT 2020


From: Daniel Bailey <danielb at meshplusplus.com>
Date: Fri, 29 May 2020 17:37:25 -0700
Subject: [PATCH] procd: add service instance watchdog

Added instance watchdog which will eventually either terminate
or respawn an instance depending on the instance respawn setting.

Added service ubus method 'watchdog' which services the watchdog
timer and allows update of the instance watchdog mode instance.

Three modes: disabled, passive, active. Presently, only disabled
and passive modes are implemented.

Disabled: cancels watchdog timer set for a given instance.

Passive: sets a instance timer which must be serviced or the
instance will be stopped/restarted depending upon the instance
respawn value when the timer expires.

Active (to be implemented): requires an additional service 'endpoint'
parameter. Upon watchdog timer expiry, procd will query the endpoint
to determine whether the instance is alive. If the instance does not
answer, procd will terminate or respawn the instance depending on
the instance respawn setting.

Signed-off-by: Daniel Bailey <danielb at meshplusplus.com>
---
 service/instance.c | 68 ++++++++++++++++++++++++++++++++++++++++++++++
 service/instance.h | 15 ++++++++++
 service/service.c  | 68 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 151 insertions(+)

diff --git a/service/instance.c b/service/instance.c
index 142208a..8560a95 100644
--- a/service/instance.c
+++ b/service/instance.c
@@ -65,6 +65,7 @@ enum {
  INSTANCE_ATTR_EXTROOT,
  INSTANCE_ATTR_OVERLAYDIR,
  INSTANCE_ATTR_TMPOVERLAYSIZE,
+ INSTANCE_ATTR_WATCHDOG,
  __INSTANCE_ATTR_MAX
 };

@@ -95,6 +96,7 @@ static const struct blobmsg_policy
instance_attr[__INSTANCE_ATTR_MAX] = {
  [INSTANCE_ATTR_EXTROOT] = { "extroot", BLOBMSG_TYPE_STRING },
  [INSTANCE_ATTR_OVERLAYDIR] = { "overlaydir", BLOBMSG_TYPE_STRING },
  [INSTANCE_ATTR_TMPOVERLAYSIZE] = { "tmpoverlaysize", BLOBMSG_TYPE_STRING
},
+ [INSTANCE_ATTR_WATCHDOG] = { "watchdog", BLOBMSG_TYPE_ARRAY },
 };

 enum {
@@ -546,6 +548,11 @@ instance_start(struct service_instance *in)
  fcntl(epipe[0], F_SETFD, FD_CLOEXEC);
  }

+ if (in->watchdog.mode != INSTANCE_WATCHDOG_MODE_DISABLED) {
+ uloop_timeout_set(&in->watchdog.timeout, in->watchdog.freq * 1000);
+ DEBUG(2, "Started instance %s::%s watchdog timer : timeout = %d\n",
in->srv->name, in->name, in->watchdog.freq);
+ }
+
  service_event("instance.start", in->srv->name, in->name);
 }

@@ -693,6 +700,7 @@ instance_exit(struct uloop_process *p, int ret)

  in->exit_code = instance_exit_code(ret);
  uloop_timeout_cancel(&in->timeout);
+ uloop_timeout_cancel(&in->watchdog.timeout);
  service_event("instance.stop", in->srv->name, in->name);

  if (in->halt) {
@@ -752,6 +760,19 @@ instance_restart(struct service_instance *in)
  uloop_timeout_set(&in->timeout, in->term_timeout * 1000);
 }

+static void
+instance_watchdog(struct uloop_timeout *t)
+{
+ struct service_instance *in = container_of(t, struct service_instance,
watchdog.timeout);
+
+ DEBUG(3, "instance %s::%s watchdog timer expired\n", in->srv->name,
in->name);
+
+ if (in->respawn)
+ instance_restart(in);
+ else
+ instance_stop(in, true);
+}
+
 static bool string_changed(const char *a, const char *b)
 {
  return !((!a && !b) || (a && b && !strcmp(a, b)));
@@ -817,6 +838,12 @@ instance_config_changed(struct service_instance *in,
struct service_instance *in
  if (!blobmsg_list_equal(&in->errors, &in_new->errors))
  return true;

+ if (in->watchdog.mode != in_new->watchdog.mode)
+ return true;
+
+ if (in->watchdog.freq != in_new->watchdog.freq)
+ return true;
+
  return false;
 }

@@ -1170,6 +1197,36 @@ instance_config_parse(struct service_instance *in)
  DEBUG(3, "unknown syslog facility '%s' given, using default
(LOG_DAEMON)\n", blobmsg_get_string(tb[INSTANCE_ATTR_FACILITY]));
  }

+ if (tb[INSTANCE_ATTR_WATCHDOG]) {
+ int i = 0;
+ uint32_t vals[2] = { 0, 30 };
+
+ blobmsg_for_each_attr(cur2, tb[INSTANCE_ATTR_WATCHDOG], rem) {
+ if (i >= 2)
+ break;
+
+ vals[i] = atoi(blobmsg_get_string(cur2));
+ i++;
+ }
+
+ // TODO(danielb): change mode integers to strings (0 = disabled, 1 =
passive, 2 = active)
+ if (vals[0] >= 0 && vals[0] < __INSTANCE_WATCHDOG_MODE_MAX) {
+ in->watchdog.mode = vals[0];
+ DEBUG(3, "setting watchdog mode (%d)\n", vals[0]);
+ } else {
+ in->watchdog.mode = 0;
+ DEBUG(3, "unknown watchdog mode (%d) given, using default (0)\n",
vals[0]);
+ }
+
+ if (vals[1] > 0) {
+ in->watchdog.freq = vals[1];
+ DEBUG(3, "setting watchdog timeout (%d)\n", vals[0]);
+ } else {
+ in->watchdog.freq = 30;
+ DEBUG(3, "invalid watchdog timeout (%d) given, using default (30)\n",
vals[1]);
+ }
+ }
+
  return true;
 }

@@ -1255,6 +1312,7 @@ instance_free(struct service_instance *in)
  instance_free_stdio(in);
  uloop_process_delete(&in->proc);
  uloop_timeout_cancel(&in->timeout);
+ uloop_timeout_cancel(&in->watchdog.timeout);
  trigger_del(in);
  watch_del(in);
  instance_config_cleanup(in);
@@ -1308,6 +1366,9 @@ instance_init(struct service_instance *in, struct
service *s, struct blob_attr *
  blobmsg_list_simple_init(&in->limits);
  blobmsg_list_simple_init(&in->errors);
  blobmsg_list_simple_init(&in->jail.mount);
+
+ in->watchdog.timeout.cb = instance_watchdog;
+
  in->valid = instance_config_parse(in);
 }

@@ -1425,5 +1486,12 @@ void instance_dump(struct blob_buf *b, struct
service_instance *in, int verbose)
  if (verbose && in->trigger)
  blobmsg_add_blob(b, in->trigger);

+ if (in->watchdog.mode != INSTANCE_WATCHDOG_MODE_DISABLED) {
+ void *r = blobmsg_open_table(b, "watchdog");
+ blobmsg_add_u32(b, "mode", in->watchdog.mode);
+ blobmsg_add_u32(b, "timeout", in->watchdog.freq);
+ blobmsg_close_table(b, r);
+ }
+
  blobmsg_close_table(b, i);
 }
diff --git a/service/instance.h b/service/instance.h
index 4400cd4..590f931 100644
--- a/service/instance.h
+++ b/service/instance.h
@@ -23,6 +23,19 @@
 #define RESPAWN_ERROR (5 * 60)
 #define SIGNALLED_OFFSET 128

+typedef enum instance_watchdog {
+ INSTANCE_WATCHDOG_MODE_DISABLED,
+ INSTANCE_WATCHDOG_MODE_PASSIVE,
+ INSTANCE_WATCHDOG_MODE_ACTIVE,
+ __INSTANCE_WATCHDOG_MODE_MAX,
+} instance_watchdog_mode_t;
+
+struct watchdog {
+ instance_watchdog_mode_t mode;
+ uint32_t freq;
+ struct uloop_timeout timeout;
+};
+
 struct jail {
  bool procfs;
  bool sysfs;
@@ -94,6 +107,8 @@ struct service_instance {
  struct blobmsg_list file;
  struct blobmsg_list limits;
  struct blobmsg_list errors;
+
+ struct watchdog watchdog;
 };

 void instance_start(struct service_instance *in);
diff --git a/service/service.c b/service/service.c
index fcf0215..d9249a3 100644
--- a/service/service.c
+++ b/service/service.c
@@ -727,6 +727,73 @@ service_get_data(struct ubus_context *ctx, struct
ubus_object *obj,
  return 0;
 }

+enum {
+ SERVICE_WATCHDOG_MODE,
+ SERVICE_WATCHDOG_TIMEOUT,
+ SERVICE_WATCHDOG_NAME,
+ SERVICE_WATCHDOG_INSTANCE,
+ __SERVICE_WATCHDOG_MAX,
+};
+
+static const struct blobmsg_policy
service_watchdog_policy[__SERVICE_WATCHDOG_MAX] = {
+ [SERVICE_WATCHDOG_MODE] = { "mode", BLOBMSG_TYPE_INT32 },
+ [SERVICE_WATCHDOG_NAME] = { "name", BLOBMSG_TYPE_STRING },
+ [SERVICE_WATCHDOG_TIMEOUT] = { "timeout", BLOBMSG_TYPE_INT32 },
+ [SERVICE_WATCHDOG_INSTANCE] = { "instance", BLOBMSG_TYPE_STRING },
+};
+
+static int
+service_handle_watchdog(struct ubus_context *ctx, struct ubus_object *obj,
+    struct ubus_request_data *req, const char *method,
+    struct blob_attr *msg)
+{
+ struct blob_attr *tb[__SERVICE_WATCHDOG_MAX] = {0};
+ struct service *s;
+ struct blob_attr *cur;
+ struct service_instance *in;
+
+ blobmsg_parse(service_watchdog_policy, __SERVICE_WATCHDOG_MAX, tb,
blobmsg_data(msg), blobmsg_data_len(msg));
+ cur = tb[SERVICE_WATCHDOG_NAME];
+ if (!cur)
+ return UBUS_STATUS_NOT_FOUND;
+
+ s = avl_find_element(&services, blobmsg_data(cur), s, avl);
+ if (!s)
+ return UBUS_STATUS_NOT_FOUND;
+
+ cur = tb[SERVICE_WATCHDOG_INSTANCE];
+ if (!cur)
+ return UBUS_STATUS_NOT_FOUND;
+
+ in = vlist_find(&s->instances, blobmsg_data(cur), in, node);
+ if (!in) {
+ ERROR("instance %s not found\n", blobmsg_get_string(cur));
+ return UBUS_STATUS_NOT_FOUND;
+ }
+
+ // TODO(danielb): change mode from u32 to string (0 = disabled, 1 =
passive, 2 = active)
+ if (tb[SERVICE_WATCHDOG_MODE])
+ in->watchdog.mode = blobmsg_get_u32(tb[SERVICE_WATCHDOG_MODE]);
+
+ if (tb[SERVICE_WATCHDOG_TIMEOUT])
+ in->watchdog.freq = blobmsg_get_u32(tb[SERVICE_WATCHDOG_TIMEOUT]);
+
+ if (in->watchdog.mode == INSTANCE_WATCHDOG_MODE_DISABLED)
+ uloop_timeout_cancel(&in->watchdog.timeout);
+ else
+ uloop_timeout_set(&in->watchdog.timeout, in->watchdog.freq * 1000);
+
+ blob_buf_init(&b, 0);
+ blobmsg_add_string(&b, "name",
blobmsg_get_string(tb[SERVICE_WATCHDOG_NAME]));
+ blobmsg_add_string(&b, "instance",
blobmsg_get_string(tb[SERVICE_WATCHDOG_INSTANCE]));
+ blobmsg_add_u32(&b, "mode", in->watchdog.mode);
+ blobmsg_add_u32(&b, "timeout", in->watchdog.freq);
+
+ ubus_send_reply(ctx, req, b.head);
+
+ return UBUS_STATUS_OK;
+}
+
 static int
 container_handle_console(struct ubus_context *ctx, struct ubus_object *obj,
  struct ubus_request_data *req, const char *method,
@@ -797,6 +864,7 @@ static struct ubus_method main_object_methods[] = {
  UBUS_METHOD("validate", service_handle_validate, validate_policy),
  UBUS_METHOD("get_data", service_get_data, get_data_policy),
  UBUS_METHOD("state", service_handle_state, service_state_attrs),
+ UBUS_METHOD("watchdog", service_handle_watchdog, service_watchdog_policy),
 };

 static struct ubus_object_type main_object_type =
--
2.25.1
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.infradead.org/pipermail/openwrt-devel/attachments/20200529/a88026e4/attachment.htm>
-------------- next part --------------
_______________________________________________
openwrt-devel mailing list
openwrt-devel at lists.openwrt.org
https://lists.openwrt.org/mailman/listinfo/openwrt-devel


More information about the openwrt-devel mailing list