feat: collect response metrics from cron locations

This commit is contained in:
Adam Janis 2020-11-21 22:01:28 +01:00
parent 35c620f485
commit 7051f275e7
12 changed files with 177 additions and 106 deletions

3
.prettierignore Normal file
View File

@ -0,0 +1,3 @@
# Ignore generated files
out
public

View File

@ -14,22 +14,22 @@ appearance, race, religion, or sexual identity and orientation.
Examples of behavior that contributes to creating a positive environment Examples of behavior that contributes to creating a positive environment
include: include:
* Using welcoming and inclusive language - Using welcoming and inclusive language
* Being respectful of differing viewpoints and experiences - Being respectful of differing viewpoints and experiences
* Gracefully accepting constructive criticism - Gracefully accepting constructive criticism
* Focusing on what is best for the community - Focusing on what is best for the community
* Showing empathy towards other community members - Showing empathy towards other community members
Examples of unacceptable behavior by participants include: Examples of unacceptable behavior by participants include:
* The use of sexualized language or imagery and unwelcome sexual attention or - The use of sexualized language or imagery and unwelcome sexual attention or
advances advances
* Trolling, insulting/derogatory comments, and personal or political attacks - Trolling, insulting/derogatory comments, and personal or political attacks
* Public or private harassment - Public or private harassment
* Publishing others' private information, such as a physical or electronic - Publishing others' private information, such as a physical or electronic
address, without explicit permission address, without explicit permission
* Other conduct which could reasonably be considered inappropriate in a - Other conduct which could reasonably be considered inappropriate in a
professional setting professional setting
## Our Responsibilities ## Our Responsibilities

View File

@ -10,15 +10,15 @@ Monitor your websites, showcase status including daily history, and get Slack no
You'll need a [Cloudflare Workers account](https://dash.cloudflare.com/sign-up/workers) with You'll need a [Cloudflare Workers account](https://dash.cloudflare.com/sign-up/workers) with
* A workers domain set up - A workers domain set up
* The Workers Bundled subscription \($5/mo\) - The Workers Bundled subscription \($5/mo\)
* [It works with Workers Free!](https://blog.cloudflare.com/workers-kv-free-tier/) Check [more info](#workers-kv-free-tier) on how to run on Workers Free. - [It works with Workers Free!](https://blog.cloudflare.com/workers-kv-free-tier/) Check [more info](#workers-kv-free-tier) on how to run on Workers Free.
* Some websites/APIs to watch 🙂 - Some websites/APIs to watch 🙂
Also, prepare the following secrets Also, prepare the following secrets
* Cloudflare API token with `Edit Cloudflare Workers` permissions - Cloudflare API token with `Edit Cloudflare Workers` permissions
* Slack incoming webhook \(optional\) - Slack incoming webhook \(optional\)
## Getting started ## Getting started
@ -39,6 +39,7 @@ You can either deploy with **Cloudflare Deploy Button** using GitHub Actions or
- Name: SECRET_SLACK_WEBHOOK_URL (optional) - Name: SECRET_SLACK_WEBHOOK_URL (optional)
- Value: your-slack-webhook-url - Value: your-slack-webhook-url
``` ```
3. Navigate to the **Actions** settings in your repository and enable them 3. Navigate to the **Actions** settings in your repository and enable them
4. Edit [config.yaml](./config.yaml) to adjust configuration and list all of your websites/APIs you want to monitor 4. Edit [config.yaml](./config.yaml) to adjust configuration and list all of your websites/APIs you want to monitor
@ -73,32 +74,35 @@ You can either deploy with **Cloudflare Deploy Button** using GitHub Actions or
5. Push to `main` branch to trigger the deployment 5. Push to `main` branch to trigger the deployment
6. 🎉 6. 🎉
7. _\(optional\)_ Go to [Cloudflare Workers settings](https://dash.cloudflare.com/?to=/workers) and assign custom domain/route 7. _\(optional\)_ Go to [Cloudflare Workers settings](https://dash.cloudflare.com/?to=/workers) and assign custom domain/route
* e.g. `status-page.eidam.dev/*` _\(make sure you include `/*` as the Worker also serve static files\)_ - e.g. `status-page.eidam.dev/*` _\(make sure you include `/*` as the Worker also serve static files\)_
8. _\(optional\)_ Edit [wrangler.toml](./wrangler.toml) to adjust Worker settings or CRON Trigger schedule, especially if you are on [Workers Free plan](#workers-kv-free-tier) 8. _\(optional\)_ Edit [wrangler.toml](./wrangler.toml) to adjust Worker settings or CRON Trigger schedule, especially if you are on [Workers Free plan](#workers-kv-free-tier)
### Deploy on your own ### Deploy on your own
You can clone the repository yourself and use Wrangler CLI to develop/deploy, extra list of things you need to take care of: You can clone the repository yourself and use Wrangler CLI to develop/deploy, extra list of things you need to take care of:
* create KV namespace and add the `KV_STATUS_PAGE` binding to [wrangler.toml](./wrangler.toml) - create KV namespace and add the `KV_STATUS_PAGE` binding to [wrangler.toml](./wrangler.toml)
* create Worker secrets _\(optional\)_ - create Worker secrets _\(optional\)_
* `SECRET_SLACK_WEBHOOK_URL` - `SECRET_SLACK_WEBHOOK_URL`
## Workers KV free tier ## Workers KV free tier
The Workers Free plan includes limited KV usage, but the quota is sufficient for 2-minute checks only
* Change the CRON trigger to 2 minutes interval (`crons = ["*/2 * * * *"]`) in [wrangler.toml](./wrangler.toml) The Workers Free plan includes limited KV usage, but the quota is sufficient for 2-minute checks only
- Change the CRON trigger to 2 minutes interval (`crons = ["*/2 * * * *"]`) in [wrangler.toml](./wrangler.toml)
## Known issues ## Known issues
* **Max 25 monitors to watch in case you are using Slack notifications**, due to the limit of subrequests Cloudflare Worker can make \(50\). - **Max 25 monitors to watch in case you are using Slack notifications**, due to the limit of subrequests Cloudflare Worker can make \(50\).
The plan is to support up to 49 by sending only one Slack notification per scheduled run. The plan is to support up to 49 by sending only one Slack notification per scheduled run.
* **KV replication lag** - You might get Slack notification instantly, however it may take couple of more seconds to see the change on your status page as [Cron Triggers are usually running on underutilized quiet hours machines](https://blog.cloudflare.com/introducing-cron-triggers-for-cloudflare-workers/#how-are-you-able-to-offer-this-feature-at-no-additional-cost). - **KV replication lag** - You might get Slack notification instantly, however it may take couple of more seconds to see the change on your status page as [Cron Triggers are usually running on underutilized quiet hours machines](https://blog.cloudflare.com/introducing-cron-triggers-for-cloudflare-workers/#how-are-you-able-to-offer-this-feature-at-no-additional-cost).
* **Initial delay (no data)** - It takes couple of minutes to schedule and run CRON Triggers for the first time - **Initial delay (no data)** - It takes couple of minutes to schedule and run CRON Triggers for the first time
## Future plans ## Future plans
Stay tuned for more features coming in, like leveraging the fact that CRON instances are scheduled around the world during the day Stay tuned for more features coming in, like leveraging the fact that CRON instances are scheduled around the world during the day
so we can monitor the response times. However, we will most probably wait for the [Durable Objects](https://blog.cloudflare.com/introducing-workers-durable-objects/) to be in open beta so we can monitor the response times. However, we will most probably wait for the [Durable Objects](https://blog.cloudflare.com/introducing-workers-durable-objects/) to be in open beta
as they are better fit to reliably store such info. as they are better fit to reliably store such info.

View File

@ -1,4 +1,3 @@
# Table of contents # Table of contents
* [Cloudflare Worker - Status Page](README.md) - [Cloudflare Worker - Status Page](README.md)

View File

@ -3,6 +3,7 @@ settings:
url: 'https://status-page.eidam.dev' # used for Slack messages url: 'https://status-page.eidam.dev' # used for Slack messages
logo: logo-192x192.png # image in ./public/ folder logo: logo-192x192.png # image in ./public/ folder
daysInHistogram: 90 # number of days you want to display in histogram daysInHistogram: 90 # number of days you want to display in histogram
collectResponseTimes: false # experimental feature, enable only for <5 monitors or on paid plans
allmonitorsOperational: 'All Systems Operational' allmonitorsOperational: 'All Systems Operational'
notAllmonitorsOperational: 'Not All Systems Operational' notAllmonitorsOperational: 'Not All Systems Operational'
@ -11,7 +12,7 @@ settings:
monitorLabelNoData: 'No data' monitorLabelNoData: 'No data'
dayInHistogramNoData: 'No data' dayInHistogramNoData: 'No data'
dayInHistogramOperational: 'All good' dayInHistogramOperational: 'All good'
dayInHistogramNotOperational: 'Some checks failed' dayInHistogramNotOperational: ' incident(s)' # xx incident(s) recorded
monitors: monitors:
- id: workers-cloudflare-com # unique identifier - id: workers-cloudflare-com # unique identifier

View File

@ -2,7 +2,7 @@ import { Store } from 'laco'
import { useStore } from 'laco-react' import { useStore } from 'laco-react'
import Head from 'flareact/head' import Head from 'flareact/head'
import { getMonitors, useKeyPress, switchTheme } from '../src/functions/helpers' import { getKVMonitors, useKeyPress } from '../src/functions/helpers'
import config from '../config.yaml' import config from '../config.yaml'
import MonitorCard from '../src/components/monitorCard' import MonitorCard from '../src/components/monitorCard'
import MonitorFilter from '../src/components/monitorFilter' import MonitorFilter from '../src/components/monitorFilter'
@ -24,23 +24,20 @@ const filterByTerm = (term) =>
export async function getEdgeProps() { export async function getEdgeProps() {
// get KV data // get KV data
const { const kvMonitors = await getKVMonitors()
value: kvMonitors,
metadata: kvMonitorsMetadata,
} = await getMonitors()
return { return {
props: { props: {
config, config,
kvMonitors: kvMonitors || {}, kvMonitors: kvMonitors ? kvMonitors.monitors : {},
kvMonitorsMetadata: kvMonitorsMetadata || {}, kvMonitorsLastUpdate: kvMonitors ? kvMonitors.lastUpdate : {},
}, },
// Revalidate these props once every x seconds // Revalidate these props once every x seconds
revalidate: 5, revalidate: 5,
} }
} }
export default function Index({ config, kvMonitors, kvMonitorsMetadata }) { export default function Index({ config, kvMonitors, kvMonitorsLastUpdate }) {
const state = useStore(MonitorStore) const state = useStore(MonitorStore)
const slash = useKeyPress('/') const slash = useKeyPress('/')
@ -81,7 +78,7 @@ export default function Index({ config, kvMonitors, kvMonitorsMetadata }) {
<MonitorFilter active={slash} callback={filterByTerm} /> <MonitorFilter active={slash} callback={filterByTerm} />
</div> </div>
</div> </div>
<MonitorStatusHeader kvMonitorsMetadata={kvMonitorsMetadata} /> <MonitorStatusHeader kvMonitorsLastUpdate={kvMonitorsLastUpdate} />
{state.visible.map((monitor, key) => { {state.visible.map((monitor, key) => {
return ( return (
<MonitorCard <MonitorCard

View File

@ -58,7 +58,7 @@
.tooltip .content { .tooltip .content {
@apply invisible absolute z-50 inline-block; @apply invisible absolute z-50 inline-block;
@apply rounded-full py-1 px-2 bg-gray-100 dark:bg-gray-800 shadow; @apply rounded-lg py-1 px-2 bg-gray-100 dark:bg-gray-800 shadow;
@apply opacity-0 transition-all duration-200 scale-50; @apply opacity-0 transition-all duration-200 scale-50;
} }

View File

@ -16,14 +16,17 @@ export default function MonitorHistogram({ monitorId, kvMonitor }) {
let bg = '' let bg = ''
let dayInHistogramLabel = config.settings.dayInHistogramNoData let dayInHistogramLabel = config.settings.dayInHistogramNoData
// filter all dates before first check, check the rest // filter all dates before first check, then check the rest
if (kvMonitor && kvMonitor.firstCheck <= dayInHistogram) { if (kvMonitor && kvMonitor.firstCheck <= dayInHistogram) {
if (!kvMonitor.failedDays.includes(dayInHistogram)) { if (
kvMonitor.checks.hasOwnProperty(dayInHistogram) &&
kvMonitor.checks[dayInHistogram].fails > 0
) {
bg = 'yellow'
dayInHistogramLabel = `${kvMonitor.checks[dayInHistogram].fails} ${config.settings.dayInHistogramNotOperational}`
} else {
bg = 'green' bg = 'green'
dayInHistogramLabel = config.settings.dayInHistogramOperational dayInHistogramLabel = config.settings.dayInHistogramOperational
} else {
bg = 'yellow'
dayInHistogramLabel = config.settings.dayInHistogramNotOperational
} }
} }
@ -36,6 +39,15 @@ export default function MonitorHistogram({ monitorId, kvMonitor }) {
<span className="font-semibold text-sm"> <span className="font-semibold text-sm">
{dayInHistogramLabel} {dayInHistogramLabel}
</span> </span>
{kvMonitor.checks.hasOwnProperty(dayInHistogram) &&
Object.keys(kvMonitor.checks[dayInHistogram].res).map((key) => {
return (
<>
<br />
{key}: {kvMonitor.checks[dayInHistogram].res[key].a}ms
</>
)
})}
</div> </div>
</div> </div>
) )

View File

@ -7,11 +7,11 @@ const classes = {
'bg-yellow-200 text-yellow-700 dark:bg-yellow-700 dark:text-yellow-200 border-yellow-300 dark:border-yellow-600', 'bg-yellow-200 text-yellow-700 dark:bg-yellow-700 dark:text-yellow-200 border-yellow-300 dark:border-yellow-600',
} }
export default function MonitorStatusHeader({ kvMonitorsMetadata }) { export default function MonitorStatusHeader({ kvMonitorsLastUpdate }) {
let color = 'green' let color = 'green'
let text = config.settings.allmonitorsOperational let text = config.settings.allmonitorsOperational
if (!kvMonitorsMetadata.monitorsOperational) { if (!kvMonitorsLastUpdate.allOperational) {
color = 'yellow' color = 'yellow'
text = config.settings.notAllmonitorsOperational text = config.settings.notAllmonitorsOperational
} }
@ -20,13 +20,11 @@ export default function MonitorStatusHeader({ kvMonitorsMetadata }) {
<div className={`card mb-4 font-semibold ${classes[color]}`}> <div className={`card mb-4 font-semibold ${classes[color]}`}>
<div className="flex flex-row justify-between items-center"> <div className="flex flex-row justify-between items-center">
<div>{text}</div> <div>{text}</div>
{kvMonitorsMetadata.lastUpdate && typeof window !== 'undefined' && ( {kvMonitorsLastUpdate.time && typeof window !== 'undefined' && (
<div className="text-xs font-light"> <div className="text-xs font-light">
checked{' '} checked{' '}
{Math.round( {Math.round((Date.now() - kvMonitorsLastUpdate.time) / 1000)} sec
(Date.now() - kvMonitorsMetadata.lastUpdate.time) / 1000, ago (from {kvMonitorsLastUpdate.loc})
)}{' '}
sec ago (from {kvMonitorsMetadata.lastUpdate.loc})
</div> </div>
)} )}
</div> </div>

View File

@ -12,7 +12,7 @@ export default function MonitorStatusLabel({ kvMonitor }) {
let text = 'No data' let text = 'No data'
if (typeof kvMonitor !== 'undefined') { if (typeof kvMonitor !== 'undefined') {
if (kvMonitor.operational) { if (kvMonitor.lastCheck.operational) {
color = 'green' color = 'green'
text = config.settings.monitorLabelOperational text = config.settings.monitorLabelOperational
} else { } else {

View File

@ -1,33 +1,40 @@
import config from '../../config.yaml' import config from '../../config.yaml'
import { setKV, getKVWithMetadata, notifySlack } from './helpers' import {
notifySlack,
getCheckLocation,
getKVMonitors,
setKVMonitors,
} from './helpers'
function getDate() { function getDate() {
return new Date().toISOString().split('T')[0] return new Date().toISOString().split('T')[0]
} }
export async function processCronTrigger(event) { export async function processCronTrigger(event) {
// Get Worker PoP and save it to monitorsStateMetadata
const checkLocation = await getCheckLocation()
const checkDay = getDate()
// Get monitors state from KV // Get monitors state from KV
let { let monitorsState = await getKVMonitors()
value: monitorsState,
metadata: monitorsStateMetadata,
} = await getKVWithMetadata('monitors_data', 'json')
// Create empty state objects if not exists in KV storage yet // Create empty state objects if not exists in KV storage yet
if (!monitorsState) { if (!monitorsState) {
monitorsState = {} monitorsState = { lastUpdate: {}, monitors: {} }
}
if (!monitorsStateMetadata) {
monitorsStateMetadata = {}
} }
// Reset default all monitors state to true // Reset default all monitors state to true
monitorsStateMetadata.monitorsOperational = true monitorsState.lastUpdate.allOperational = true
for (const monitor of config.monitors) { for (const monitor of config.monitors) {
// Create default monitor state if does not exist yet // Create default monitor state if does not exist yet
if (typeof monitorsState[monitor.id] === 'undefined') { if (typeof monitorsState.monitors[monitor.id] === 'undefined') {
monitorsState[monitor.id] = { failedDays: [] } monitorsState.monitors[monitor.id] = {
firstCheck: checkDay,
lastCheck: {},
checks: {},
}
} }
console.log(`Checking ${monitor.name} ...`) console.log(`Checking ${monitor.name} ...`)
@ -41,52 +48,90 @@ export async function processCronTrigger(event) {
}, },
} }
// Perform a check and measure time
const requestStartTime = Date.now()
const checkResponse = await fetch(monitor.url, init) const checkResponse = await fetch(monitor.url, init)
const requestTime = Math.round(Date.now() - requestStartTime)
// Determine whether operational and status changed
const monitorOperational = const monitorOperational =
checkResponse.status === (monitor.expectStatus || 200) checkResponse.status === (monitor.expectStatus || 200)
const monitorStatusChanged =
monitorsState.monitors[monitor.id].lastCheck.operational !==
monitorOperational
// Save monitor's last check response status
monitorsState.monitors[monitor.id].lastCheck = {
status: checkResponse.status,
statusText: checkResponse.statusText,
operational: monitorOperational,
}
// Send Slack message on monitor change // Send Slack message on monitor change
if ( if (
monitorsState[monitor.id].operational !== monitorOperational && monitorStatusChanged &&
typeof SECRET_SLACK_WEBHOOK_URL !== 'undefined' && typeof SECRET_SLACK_WEBHOOK_URL !== 'undefined' &&
SECRET_SLACK_WEBHOOK_URL !== 'default-gh-action-secret' SECRET_SLACK_WEBHOOK_URL !== 'default-gh-action-secret'
) { ) {
event.waitUntil(notifySlack(monitor, monitorOperational)) event.waitUntil(notifySlack(monitor, monitorOperational))
} }
monitorsState[monitor.id].operational = // make sure checkDay exists in checks in cases when needed
checkResponse.status === (monitor.expectStatus || 200) if (
monitorsState[monitor.id].firstCheck = (config.settings.collectResponseTimes || !monitorOperational) &&
monitorsState[monitor.id].firstCheck || getDate() !monitorsState.monitors[monitor.id].checks.hasOwnProperty(checkDay)
) {
monitorsState.monitors[monitor.id].checks[checkDay] = {
fails: 0,
res: {},
}
}
// Set monitorsOperational and push current day to failedDays if (config.settings.collectResponseTimes && monitorOperational) {
if (!monitorOperational) { // make sure location exists in current checkDay
monitorsStateMetadata.monitorsOperational = false if (
!monitorsState.monitors[monitor.id].checks[checkDay].res.hasOwnProperty(
checkLocation,
)
) {
monitorsState.monitors[monitor.id].checks[checkDay].res[
checkLocation
] = {
n: 0,
ms: 0,
a: 0,
}
}
const failedDay = getDate() // increment number of checks and sum of ms
if (!monitorsState[monitor.id].failedDays.includes(failedDay)) { const no = ++monitorsState.monitors[monitor.id].checks[checkDay].res[
console.log('Saving new failed daily status ...') checkLocation
monitorsState[monitor.id].failedDays.push(failedDay) ].n
const ms = (monitorsState.monitors[monitor.id].checks[checkDay].res[
checkLocation
].ms += requestTime)
// save new average ms
monitorsState.monitors[monitor.id].checks[checkDay].res[
checkLocation
].a = Math.round(ms / no)
} else if (!monitorOperational) {
// Save allOperational to false
monitorsState.lastUpdate.allOperational = false
// Increment failed checks, only on status change (maybe call it .incidents instead?)
if (monitorStatusChanged) {
monitorsState.monitors[monitor.id].checks[checkDay].fails++
} }
} }
} }
// Get Worker PoP and save it to monitorsStateMetadata // Save last update information
const res = await fetch('https://cloudflare-dns.com/dns-query', { monitorsState.lastUpdate.time = Date.now()
method: 'OPTIONS', monitorsState.lastUpdate.loc = checkLocation
})
const loc = res.headers.get('cf-ray').split('-')[1]
monitorsStateMetadata.lastUpdate = {
loc,
time: Date.now(),
}
// Save monitorsState and monitorsStateMetadata to KV storage // Save monitorsState to KV storage
await setKV( await setKVMonitors(monitorsState)
'monitors_data',
JSON.stringify(monitorsState),
monitorsStateMetadata,
)
return new Response('OK') return new Response('OK')
} }

View File

@ -1,18 +1,20 @@
import config from '../../config.yaml' import config from '../../config.yaml'
import { useEffect, useState } from 'react' import { useEffect, useState } from 'react'
export async function getMonitors() { export async function getKVMonitors() {
return await getKVWithMetadata('monitors_data', 'json') // trying both to see performance difference
return KV_STATUS_PAGE.get('monitors_data', 'json')
//return JSON.parse(await KV_STATUS_PAGE.get('monitors_data', 'text'))
}
export async function setKVMonitors(data) {
return setKV('monitors_data', JSON.stringify(data))
} }
export async function setKV(key, value, metadata, expirationTtl) { export async function setKV(key, value, metadata, expirationTtl) {
return KV_STATUS_PAGE.put(key, value, { metadata, expirationTtl }) return KV_STATUS_PAGE.put(key, value, { metadata, expirationTtl })
} }
export async function getKVWithMetadata(key, type = 'text') {
return KV_STATUS_PAGE.getWithMetadata(key, type)
}
export async function notifySlack(monitor, operational) { export async function notifySlack(monitor, operational) {
const payload = { const payload = {
attachments: [ attachments: [
@ -23,10 +25,11 @@ export async function notifySlack(monitor, operational) {
type: 'section', type: 'section',
text: { text: {
type: 'mrkdwn', type: 'mrkdwn',
text: `Monitor *${monitor.name}* changed status to *${operational text: `Monitor *${monitor.name}* changed status to *${
operational
? config.settings.monitorLabelOperational ? config.settings.monitorLabelOperational
: config.settings.monitorLabelNotOperational : config.settings.monitorLabelNotOperational
}*`, }*`,
}, },
}, },
{ {
@ -34,9 +37,11 @@ export async function notifySlack(monitor, operational) {
elements: [ elements: [
{ {
type: 'mrkdwn', type: 'mrkdwn',
text: `${operational ? ':white_check_mark:' : ':x:'} \`${monitor.method ? monitor.method : 'GET' text: `${operational ? ':white_check_mark:' : ':x:'} \`${
} ${monitor.url}\` - :eyes: <${config.settings.url monitor.method ? monitor.method : 'GET'
}|Status Page>`, } ${monitor.url}\` - :eyes: <${
config.settings.url
}|Status Page>`,
}, },
], ],
}, },
@ -78,3 +83,10 @@ export function useKeyPress(targetKey) {
return keyPressed return keyPressed
} }
export async function getCheckLocation() {
const res = await fetch('https://cloudflare-dns.com/dns-query', {
method: 'OPTIONS',
})
return res.headers.get('cf-ray').split('-')[1]
}