Skip to content

Commit ec7dd9e

Browse files
authored
Merge pull request #200 from bsyk/defrag
Add script 'cloudflare-defragment'
2 parents 04f95f5 + c5c1d7b commit ec7dd9e

File tree

4 files changed

+212
-24
lines changed

4 files changed

+212
-24
lines changed

cf_defragment.js

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import {
2+
defragmentZeroTrustLists,
3+
upsertZeroTrustDNSRule,
4+
upsertZeroTrustSNIRule,
5+
deleteZeroTrustListsOneByOne
6+
} from "./lib/api.js";
7+
import { BLOCK_BASED_ON_SNI } from "./lib/constants.js";
8+
import { notifyWebhook } from "./lib/utils.js";
9+
10+
// Defragment the lists and rewrite the rules
11+
const { emptyLists, nonEmptyLists, stats } = await defragmentZeroTrustLists();
12+
13+
14+
// If we don't have any empty lists, there's no change to the rules
15+
if (emptyLists.length > 0) {
16+
console.log('Updating rules...');
17+
// We have any empty lists, first rewrite the rule(s) using the non-empty lists
18+
await upsertZeroTrustDNSRule(nonEmptyLists, "CGPS Filter Lists");
19+
20+
// Optionally create a rule that matches the SNI.
21+
// This only works for users who proxy their traffic through Cloudflare.
22+
if (BLOCK_BASED_ON_SNI) {
23+
await upsertZeroTrustSNIRule(nonEmptyLists, "CGPS Filter Lists - SNI Based Filtering");
24+
}
25+
26+
// Now the lists are no longer referenced, we can delete them
27+
console.log('Deleting empty lists...');
28+
await deleteZeroTrustListsOneByOne(emptyLists);
29+
}
30+
31+
// Print a summary of what we did
32+
console.log(`Defragmented ${stats.chunks} lists into ${stats.assignedLists} lists`);
33+
console.log(`Patches made to ${stats.patches} lists, moving ${stats.entriesToMove} entries`);
34+
35+
// Continue summary if we deletes lists or rewrote rules
36+
if (emptyLists.length > 0) {
37+
console.log(`Updated rules using ${stats.nonEmptyLists} lists`);
38+
console.log(`Deleted ${stats.emptyLists} empty lists`);
39+
}
40+
41+
// Send a notification to the webhook
42+
await notifyWebhook("CF Defragment script finished running");

cf_gateway_rule_create.js

Lines changed: 4 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,16 @@
1-
import { getZeroTrustLists, upsertZeroTrustRule } from "./lib/api.js";
1+
import { getZeroTrustLists, upsertZeroTrustDNSRule, upsertZeroTrustSNIRule } from "./lib/api.js";
22
import { BLOCK_BASED_ON_SNI } from "./lib/constants.js";
33
import { notifyWebhook } from "./lib/utils.js";
44

55
const { result: lists } = await getZeroTrustLists();
66

7-
// Create a Wirefilter expression to match DNS queries against all the lists
8-
const wirefilterDNSExpression = lists.reduce((previous, current) => {
9-
if (!current.name.startsWith("CGPS List")) return previous;
10-
11-
return `${previous} any(dns.domains[*] in \$${current.id}) or `;
12-
}, "");
13-
14-
console.log("Checking DNS rule...");
15-
// .slice removes the trailing ' or '
16-
await upsertZeroTrustRule(wirefilterDNSExpression.slice(0, -4), "CGPS Filter Lists", ["dns"]);
7+
// Upsert DNS rules for all lists
8+
await upsertZeroTrustDNSRule(lists, "CGPS Filter Lists");
179

1810
// Optionally create a rule that matches the SNI.
1911
// This only works for users who proxy their traffic through Cloudflare.
2012
if (BLOCK_BASED_ON_SNI) {
21-
const wirefilterSNIExpression = lists.reduce((previous, current) => {
22-
if (!current.name.startsWith("CGPS List")) return previous;
23-
24-
return `${previous} any(net.sni.domains[*] in \$${current.id}) or `;
25-
}, "");
26-
27-
console.log("Creating SNI rule...");
28-
// .slice removes the trailing ' or '
29-
await upsertZeroTrustRule(wirefilterSNIExpression.slice(0, -4), "CGPS Filter Lists - SNI Based Filtering", ["l4"]);
13+
await upsertZeroTrustSNIRule(lists, "CGPS Filter Lists - SNI Based Filtering");
3014
}
3115

3216
// Send a notification to the webhook

lib/api.js

Lines changed: 164 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import { BLOCK_PAGE_ENABLED, DEBUG, LIST_ITEM_SIZE } from "./constants.js";
22
import { requestGateway } from "./helpers.js";
33

4+
const NOW_STR = new Date().toISOString();
5+
46
/**
57
* Gets Zero Trust lists.
68
*
@@ -123,11 +125,13 @@ export const synchronizeZeroTrustLists = async (items) => {
123125
// how many entries there were and how many we're removing.
124126
const spaceInList = LIST_ITEM_SIZE - (domainsByList[listId].length - patch.remove.length);
125127
// Take upto spaceInList entries from the additions into this list.
128+
// Use the current timestamp as the description to track when we first see this domain.
129+
// This can be used to defragment the lists later and consolidate more stable entries.
126130
const append = Array(spaceInList)
127131
.fill(0)
128132
.map(() => toAdd.shift())
129133
.filter(Boolean)
130-
.map(domain => ({ value: domain }));
134+
.map(domain => ({ value: domain, description: NOW_STR }));
131135
return [listId, { ...patch, append }];
132136
})
133137
);
@@ -144,7 +148,7 @@ export const synchronizeZeroTrustLists = async (items) => {
144148
.fill(0)
145149
.map(() => toAdd.shift())
146150
.filter(Boolean)
147-
.map(domain => ({ value: domain }));
151+
.map(domain => ({ value: domain, description: NOW_STR }));
148152

149153
// Add this list edit to the patches
150154
if (append.length) {
@@ -170,6 +174,128 @@ export const synchronizeZeroTrustLists = async (items) => {
170174
}
171175
};
172176

177+
/**
178+
* Defragment Zero Trust lists.
179+
* Inspects existing lists starting with "CGPS List - Chunk <number>"
180+
* Sorts the entries by the description which may include a timestamp.
181+
* Unfortunately the API does not allow setting the created_at time for the entries.
182+
* Rewrites the lists in order of the entry creation such that older
183+
* domains are in the earlier lists. Older domains implies the domain is
184+
* a more stable entry, so we're less likely to need to patch this list often.
185+
* So we can reduce the number of lists we need to patch for updates and isolate
186+
* the churn to the last list or few lists.
187+
* @returns {Promise<Object>} A object that include the now empty and non-empty lists
188+
*/
189+
export const defragmentZeroTrustLists = async () => {
190+
console.log("Checking existing lists...");
191+
const { result: lists } = await getZeroTrustLists();
192+
const cgpsLists = lists?.filter(({ name }) => name.startsWith("CGPS List - Chunk ")) || [];
193+
console.log(`Found ${cgpsLists.length} existing lists. Downloading...`);
194+
195+
// Sort the lists by the natural number order in the name
196+
cgpsLists.sort((a, b) => {
197+
const aNum = parseInt(a.name.replace("CGPS List - Chunk ", ""));
198+
const bNum = parseInt(b.name.replace("CGPS List - Chunk ", ""));
199+
return aNum - bNum;
200+
});
201+
202+
const allEntries = [];
203+
// Fetch all the items in the lists
204+
for (const list of cgpsLists) {
205+
const { result: listItems } = await getZeroTrustListItems(list.id);
206+
// Annotate the items with the list id that they came from so we know what to patch later
207+
// Ensure the description is a valid timestamp, or set it to the current time.
208+
// We use the description as the list addition time because the API does not allow setting the created_at time.
209+
const itemsWithOriginListId = listItems?.map(item => ({
210+
...item,
211+
originListId: list.id,
212+
description: isNaN(new Date(item.description)) ? NOW_STR : item.description,
213+
})) || [];
214+
allEntries.push(...itemsWithOriginListId);
215+
}
216+
217+
console.log(`Found ${allEntries.length} entries in ${cgpsLists.length} lists`);
218+
219+
// Sort the entries by the time stored in the description.
220+
// For conflict resolution use the domain name as a tiebreaker.
221+
// This is important to avoid flip-flopping entries between lists
222+
// in subsequent runs.
223+
allEntries.sort((a, b) => {
224+
const createdAtA = new Date(a.description);
225+
const createdAtB = new Date(b.description);
226+
if (createdAtA.getTime() === createdAtB.getTime()) {
227+
return a.value.localeCompare(b.value);
228+
}
229+
return createdAtA - createdAtB;
230+
});
231+
232+
// Assign the entries to lists in order of the created_at time
233+
const assignedEntries = allEntries.map((entry, index) => {
234+
const listIndex = Math.floor(index / LIST_ITEM_SIZE);
235+
const assignedListId = cgpsLists[listIndex]?.id || null;
236+
// The list should always exist since we're only shuffling the entries
237+
if (!assignedListId) {
238+
throw new Error(`Unable to resolve list for entry ${index}, have only ${cgpsLists.length} lists`);
239+
}
240+
return { ...entry, assignedListId };
241+
});
242+
243+
// Filter down to the entries that are changing assigned lists
244+
const entriesToMove = assignedEntries.filter(entry => entry.originListId !== entry.assignedListId);
245+
246+
// Create the patches per list
247+
const patches = {};
248+
for (const entry of entriesToMove) {
249+
const { originListId, assignedListId, ...gatewayItem } = entry;
250+
if (!patches[originListId]) {
251+
patches[originListId] = { append: [], remove: [] };
252+
}
253+
// Remove by value
254+
patches[originListId].remove.push(gatewayItem.value);
255+
256+
if (!patches[assignedListId]) {
257+
patches[assignedListId] = { append: [], remove: [] };
258+
}
259+
// Append by GatewayItem which has value, description and created_at properties
260+
patches[assignedListId].append.push(gatewayItem);
261+
}
262+
263+
console.log(`Found ${Object.keys(patches).length} patches to make, moving ${entriesToMove.length} entries...`);
264+
265+
// Process all the patches.
266+
for(const [listId, patch] of Object.entries(patches)) {
267+
const appends = !!patch.append ? patch.append.length : 0;
268+
const removals = !!patch.remove ? patch.remove.length : 0;
269+
console.log(`Updating list "${cgpsLists.find(list => list.id === listId).name}"${appends ? `, ${appends} additions` : ''}${removals ? `, ${removals} removals` : ''}`);
270+
await patchExistingList(listId, patch);
271+
}
272+
273+
// Did we leave any lists empty?
274+
// We can tell by checking that the list ids are used in the assignedEntries
275+
const assignedLists = new Set();
276+
assignedEntries.forEach(entry => assignedLists.add(entry.assignedListId));
277+
// Filter the lists down to those that are empty
278+
const emptyLists = cgpsLists.filter(list => !assignedLists.has(list.id));
279+
// Gather the non-empty lists, using the original list not just the chunked ones
280+
// This is important to capture any manually created lists starting with "CGPS List"
281+
// and not just the ones created by this script
282+
const nonEmptyLists = lists.filter(list => !emptyLists.some(emptyList => emptyList.id === list.id));
283+
284+
return {
285+
emptyLists,
286+
nonEmptyLists,
287+
stats: {
288+
assignedLists: assignedLists.size,
289+
emptyLists: emptyLists.length,
290+
nonEmptyLists: nonEmptyLists.length,
291+
entriesToMove: entriesToMove.length,
292+
patches: Object.keys(patches).length,
293+
allEntries: allEntries.length,
294+
chunks: cgpsLists.length,
295+
}
296+
};
297+
}
298+
173299
/**
174300
* Creates Zero Trust lists sequentially.
175301
* @param {string[]} items The domains.
@@ -179,9 +305,10 @@ export const createZeroTrustListsOneByOne = async (items, startingListNumber = 1
179305
let totalListNumber = Math.ceil(items.length / LIST_ITEM_SIZE);
180306

181307
for (let i = 0, listNumber = startingListNumber; i < items.length; i += LIST_ITEM_SIZE) {
308+
// We use the description as the list addition time because the API does not allow setting the created_at time.
182309
const chunk = items
183310
.slice(i, i + LIST_ITEM_SIZE)
184-
.map((item) => ({ value: item }));
311+
.map((item) => ({ value: item, description: NOW_STR }));
185312
const listName = `CGPS List - Chunk ${listNumber}`;
186313

187314
try {
@@ -343,3 +470,37 @@ export const deleteZeroTrustRule = async (id) => {
343470
throw err;
344471
}
345472
};
473+
474+
/**
475+
* Creates or Updates Zero Trust DNS rule for a given array of lists.
476+
* @param {object[]} lists The lists to be used for the rule.
477+
* @param {string} lists[].id The ID of the list.
478+
* @param {string} lists[].name The name of the list.
479+
* @param {string} listName The name of the list.
480+
*/
481+
export const upsertZeroTrustDNSRule = async (lists, listName) => {
482+
// Create a Wirefilter expression to match DNS queries against all the lists
483+
const wirefilterDNSExpression = lists
484+
.filter(({ name }) => name.startsWith("CGPS List"))
485+
.map(({ id }) => `any(dns.domains[*] in \$${id})`)
486+
.join(" or ");
487+
console.log("Checking DNS rule...");
488+
await upsertZeroTrustRule(wirefilterDNSExpression, listName, ["dns"]);
489+
};
490+
491+
/**
492+
* Creates or Updates Zero Trust SNI rule for a given array of lists.
493+
* @param {object[]} lists The lists to be used for the rule.
494+
* @param {string} lists[].id The ID of the list.
495+
* @param {string} lists[].name The name of the list.
496+
* @param {string} listName The name of the list.
497+
*/
498+
export const upsertZeroTrustSNIRule = async (lists, listName) => {
499+
// Create a Wirefilter expression to match SNI queries against all the lists
500+
const wirefilterSNIExpression = lists
501+
.filter(({ name }) => name.startsWith("CGPS List"))
502+
.map(({ id }) => `any(net.sni.domains[*] in \$${id})`)
503+
.join(" or ");
504+
console.log("Creating SNI rule...");
505+
await upsertZeroTrustRule(wirefilterSNIExpression, listName, ["l4"]);
506+
};

package.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
"cloudflare-create:rule": "node cf_gateway_rule_create.js",
1313
"cloudflare-create:list": "node cf_list_create.js",
1414
"cloudflare-delete:rule": "node cf_gateway_rule_delete.js",
15-
"cloudflare-delete:list": "node cf_list_delete.js"
15+
"cloudflare-delete:list": "node cf_list_delete.js",
16+
"cloudflare-defragment": "node cf_defragment.js"
1617
},
1718
"type": "module",
1819
"dependencies": {

0 commit comments

Comments
 (0)