Skip to content

Commit 569345c

Browse files
author
Rory Campbell
committed
adds periodic restarting of dynos #2 and staggers error restarts
1 parent 774ebf3 commit 569345c

File tree

8 files changed

+181
-120
lines changed

8 files changed

+181
-120
lines changed

config.json

Lines changed: 0 additions & 6 deletions
This file was deleted.

errorRestarter.js

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
const rp = require('request-promise')
2+
const moment = require('moment')
3+
const { restartAllDynos, sleep } = require('./heroku')
4+
5+
const config = {
6+
allowedTimeoutRatio: 0.05,
7+
minErrorCount: 3,
8+
intervalMs: 10000,
9+
sampleMs: 60000
10+
}
11+
12+
const getLogs = async () => {
13+
const options = {
14+
uri: 'https://papertrailapp.com/api/v1/events/search.json?q=heroku/router&limit=2000',
15+
headers: {
16+
'X-Papertrail-Token': process.env.PAPERTRAIL_TOKEN
17+
},
18+
json: true
19+
}
20+
21+
const { events } = await rp(options)
22+
23+
return events
24+
}
25+
26+
const getStatusCode = (log) => {
27+
try {
28+
const pair = log.message.split(' status=')
29+
return Number(pair[1].slice(0, 3))
30+
} catch (e) {
31+
return null
32+
}
33+
}
34+
35+
const shouldRestart = ({ allowedTimeoutRatio, sampleMs, minErrorCount }, now, logs) => {
36+
const nowMoment = moment(now)
37+
38+
const recentLogs = logs.filter(l => {
39+
const logMoment = moment(l.received_at)
40+
41+
return nowMoment.diff(logMoment) < sampleMs
42+
})
43+
44+
const statusCodes = recentLogs.map(getStatusCode).filter(sc => sc)
45+
46+
const errors = statusCodes.filter(sc => sc >= 500).length
47+
const timeouts = statusCodes.filter(sc => sc >= 503).length
48+
49+
const errorRatio = errors / statusCodes.length
50+
const timeoutRatio = timeouts / statusCodes.length
51+
52+
console.log('datetime', nowMoment.format())
53+
console.log('request count', statusCodes.length)
54+
console.log('errors', errors)
55+
console.log('error ratio', errorRatio)
56+
console.log('timeouts', timeouts)
57+
console.log('timeout ratio', timeoutRatio)
58+
59+
return timeouts >= minErrorCount &&
60+
timeoutRatio > allowedTimeoutRatio
61+
}
62+
63+
const go = async () => {
64+
try {
65+
const { intervalMs } = config
66+
67+
const logs = await getLogs()
68+
69+
const now = Date.now()
70+
if (shouldRestart(config, now, logs)) {
71+
await restartAllDynos(intervalMs)
72+
} else {
73+
console.log('No restart required')
74+
}
75+
await sleep(intervalMs)
76+
77+
process.exit(0)
78+
} catch (err) {
79+
console.error('errorRestarter error: ', err)
80+
}
81+
}
82+
83+
go()

heroku.js

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
require('dotenv').config()
2+
3+
const Heroku = require('heroku-client')
4+
const heroku = new Heroku({ token: process.env.HEROKU_TARGET_APP_API_TOKEN })
5+
6+
const getDynos = () => heroku.get(`/apps/${process.env.APP_NAME}/dynos`)
7+
const restartDyno = ({ id }) => heroku.delete(`/apps/${process.env.APP_NAME}/dynos/${id}`)
8+
9+
const restartAllDynos = async (delay) => {
10+
console.log(`Restarting dynos for ${process.env.APP_NAME}`)
11+
const dynos = getDynos()
12+
await asyncForEach(dynos, async (dyno) => {
13+
await restartDyno(dyno)
14+
await sleep(delay)
15+
})
16+
console.log('Dynos restarted')
17+
}
18+
19+
async function asyncForEach (array, callback) {
20+
for (let index = 0; index < array.length; index++) {
21+
await callback(array[index], index, array)
22+
}
23+
}
24+
25+
const sleep = ms => new Promise(resolve => setTimeout(resolve, ms))
26+
27+
module.exports = {
28+
getDynos,
29+
restartDyno,
30+
restartAllDynos,
31+
sleep,
32+
asyncForEach
33+
}

index.js

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,27 @@
1-
var forever = require('forever-monitor');
1+
var forever = require('forever-monitor')
22

3-
const file = 'monitor.js';
4-
5-
var child = new (forever.Monitor)(file, {
3+
// restarts all dynos when error rate goes too high
4+
const errorRestarterFile = 'errorRestarter.js'
5+
6+
var errorRestarterChild = new (forever.Monitor)(errorRestarterFile, {
7+
args: []
8+
})
9+
10+
errorRestarterChild.on('exit', function () {
11+
console.log(`${errorRestarterFile} has exited`)
12+
})
13+
14+
errorRestarterChild.start()
15+
16+
// restarts a proportion of dynos every set period of time
17+
const periodicRestarterFile = 'periodicRestarter.js'
18+
19+
var periodicRestarterChild = new (forever.Monitor)(periodicRestarterFile, {
620
args: []
7-
});
21+
})
822

9-
child.on('exit', function () {
10-
console.log(`${file} has exited`);
11-
});
23+
periodicRestarterChild.on('exit', function () {
24+
console.log(`${periodicRestarterFile} has exited`)
25+
})
1226

13-
child.start();
27+
periodicRestarterChild.start()

monitor.js

Lines changed: 0 additions & 101 deletions
This file was deleted.

package-lock.json

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
"version": "1.0.0",
44
"description": "",
55
"main": "index.js",
6+
"engines": {
7+
"node": "12.18.0"
8+
},
69
"scripts": {
710
"start": "node index.js",
811
"test": "echo \"Error: no test specified\" && exit 1"
@@ -13,7 +16,8 @@
1316
"dotenv": "^8.2.0",
1417
"forever-monitor": "^2.0.0",
1518
"heroku-client": "^3.1.0",
16-
"moment": "^2.24.0",
19+
"lodash": "^4.17.15",
20+
"moment": "^2.27.0",
1721
"request": "^2.88.2",
1822
"request-promise": "^4.2.5"
1923
}

periodicRestarter.js

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
const { getDynos, sleep, restartDyno } = require('./heroku')
2+
const _ = require('lodash')
3+
const INTERVAL = 15 * 60 * 1000 // 15 mins
4+
const RATIO = 0.1
5+
6+
const every15MinsRestart10pcOfDynos = async () => {
7+
let dynos = await getDynos()
8+
9+
dynos = _.sortBy(dynos, d => new Date(d.created_at))
10+
11+
if (!dynos.length) {
12+
console.log('No dynos')
13+
await sleep(10000)
14+
every15MinsRestart10pcOfDynos()
15+
}
16+
17+
const timeToSleep = INTERVAL / (RATIO * dynos.length)
18+
console.log('timeToSleep', timeToSleep)
19+
20+
await sleep(timeToSleep)
21+
22+
const dyno = dynos[0]
23+
24+
console.log('restarting dyno', dyno)
25+
await restartDyno(dyno)
26+
27+
every15MinsRestart10pcOfDynos()
28+
}
29+
30+
try {
31+
every15MinsRestart10pcOfDynos(0)
32+
} catch (err) {
33+
console.log('err', err)
34+
}

0 commit comments

Comments
 (0)