Janitor

docker-compose.yml

YAML

services:
  janior:
    image: abali/janitor:v1.2.2
    container_name: janitor
    ports:
      - 8080:8080
    restart: unless-stopped
    volumes:
      - ./config/config.yml:/janitor/config.yml

config.yml

YAML

### GENERAL CONFIGURATION
### All these directives are optional. If missing, defaults will be applied (see below).

# Enable debug logging (default: false)
# If enabled, detailed information about received data will be logged.
# If disabled, only errors will be logged.
debug: true

# Maximum size of log history to maintain (default: 1000)
logsize: 1000

# hostname (default: obtained from OS or "janitor"), used for Prometheus metrics
hostname: "janitor"

# Configuration of built in web server:
# - Host name (default: '' meaning all interfaces will be bound)
# - Port number (default: 8080)
web:
  host: ""
  port: 8080

### MONITORING TARGETS
### All these blocks are optional. If missing, will not be monitored.

monitor:
  # MQTT monitoring
  mqtt:
    # Server connection details
    # Scheme (optional: can be mqtt or mqtts, mqtt by default) and host name
    server: mqtt://openwrt.lan
    # Port number (default: 1883)
    port: 1883
    # User and password if required
    user:
    password:
    # Maximum number of messages to retain for display and average frequency calculation (default: 10)
    history: 10
    # Standard timeout to apply as a multiple of the average transmit frequency (default: 1.5, interpreted as 1.5 * avg transmit frequency)
    # (can be overridden per topics below)
    standardtimeout: 1.5
    # Topics to monitor. Can include wildcard ('/#') and wildcard subscriptions can overlap with others.
    # In case of overlap, the first matching topic will be used to determine timeout.
    targets:
      # Topic name
      - topic: "/sensors/aqara_inside/#"
        # Custom timeout threshold (fixed value in seconds) to be used for the topic (default: standardtimeout as defined above)
        timeout: 3600
        # Custom name to show instead of MQTT topic (note: does not work with wildcard topic names, only exact matches)
        name: "Inside temperature"
      - topic: "/sensors/aqara_outside/#"
        timeout: 3600
      - topic: "/sensors/#"
      - topic: "/stats/#"

  # Ping monitoring
  ping:
    # Default interval between checks in seconds (default: 60)
    # (can be overridden per target below)
    interval: 60
    # Threshold for alerts - number of consecutive misses to reach (default: 2)
    # (can be overridden per target below)
    threshold: 2
    # Targets to ping
    targets:
      # Name of the target (used for display and alerting)
      - name: openwrt
        # Network address to ping
        address: openwrt.lan
        # Interval between checks in seconds (default: default interval specified above)
        interval: 10
        # Threshold for alerts - number of consecutive misses to reach (default: default threshold specified above)
        threshold: 3
      - name: google
        address: google.com

  # HTTP monitoring
  http:
    # Default interval between checks in seconds (default: 60)
    # (can be overridden per target below)
    interval: 60
    # Default timeout for the HTTP requests in milliseconds (default: 5000)
    # (can be overridden per target below)
    timeout: 5000
    # Threshold for alerts - number of consecutive misses to reach (default: 2)
    # (can be overridden per target below)
    threshold: 2
    # Targets to monitor with HTTP GET
    targets:
      # Name of the target (used for display and alerting)
      - name: NAS
        # HTTP address for GET request
        address: http://index.nas
        # String value to check for in reply (optional - not used if not specified)
        value: nas
        # Interval between checks in seconds (default: default interval specified above)
        interval: 10
        # Timeout for the HTTP requests in milliseconds (default: default interval specified above)
        timeout: 500
        # Threshold for alerts - number of consecutive failed requests to reach (default: default threshold specified above)
        threshold: 3
      - name: google
        address: http://google.com

  # Monitoring by executing an external command
  exec:
    # Default interval between checks in seconds (default: 60)
    # (can be overridden per target below)
    interval: 60
    # Default timeout for the command to complete in milliseconds (default: 5000).
    # The execution will be terminated and marked as failed when this timeout is reached.
    timeout: 5000
    # Threshold for alerts - number of consecutive failed commands to reach (default: 2)
    # (can be overridden per target below)
    threshold: 2
    targets:
      # Name of the target (used for display and alerting)
      - name: check if /tmp/foo exists
        # Command to be executed. On Linux, this will be given to a shell process.
        command: test -f /tmp/foo

### ALERTING METHODS
### All these blocks are optional. If missing, will not be used for alerting.

alert:
  # Telegram alerting. Please specify token and chat id for the messages.
  # For more information, please see:
  # https://core.telegram.org/bots#6-botfather
  # https://stackoverflow.com/questions/45414021/get-telegram-channel-group-id
  #telegram:
  #  token: "INSERTYOURTOKENHERE"
  #  chat: 12

  # Gotify alerting. Please specify server and token.
  #gotify:
  #  server: "http://nas:8765"
  #  token: AMuHwu9EhjF0Z4M

  # Executing arbitrary commands. On Linux, this will be given to a shell process.
  # The command will receive the alert message on standard input.
  #exec: /home/joe/bin/alert.sh

  # Alerting by posting to predefined MQTT topic with a JSON payload. Example payload with description of the structure:
  # {"type":"HTTP",                                      // type of the sensor - potential values: HTTP, Ping, MQTT
  #  "name":"NAS",                                       // name of the sensor
  #  "status":"ERROR",                                   // status of the sensor - potential values: OK, ERROR
  #  "since":"2020-03-22T18:58:00.157284409+01:00",      // last error/ok timestamp for sensor
  #  "error":"",                                         // further error message if any
  #  "message":"⚠ HTTP ERROR for NAS, last seen 2s ago"} // text alert message
  mqtt:
    # Server connection details
    # Scheme (optional: can be mqtt or mqtts, mqtt by default) and host name
    server: mqtt://openwrt.lan
    # Port number (default: 1883)
    port: 1883
    # User and password if required
    user:
    password:
    topic: /janitor/alerts