|
| 1 | +/** |
| 2 | + * DomainFront Relay — Apps Script with Cloudflare Worker exit. |
| 3 | + * |
| 4 | + * Variant of Code.gs that off-loads the actual outbound HTTP fetch to |
| 5 | + * a Cloudflare Worker. Apps Script becomes a thin auth-and-forward |
| 6 | + * relay; Cloudflare does the work and pays the latency. |
| 7 | + * |
| 8 | + * mhrv-rs ──► Apps Script (this file) ──► Cloudflare Worker ──► target |
| 9 | + * ▲ inbound auth & batch ▲ outbound fetch + base64 |
| 10 | + * |
| 11 | + * Wire protocol with mhrv-rs is identical to Code.gs: |
| 12 | + * 1. Single: POST { k, m, u, h, b, ct, r } → { s, h, b } |
| 13 | + * 2. Batch: POST { k, q: [{m,u,h,b,ct,r}, ...] } → { q: [{s,h,b}, ...] } |
| 14 | + * Both shapes are forwarded to the Worker as one POST per call |
| 15 | + * from Apps Script: single mode posts {k, u, m, ...} once, batch |
| 16 | + * mode posts {k, q: [...]} once. The Worker fans out batches |
| 17 | + * internally via Promise.all. This is the design choice that |
| 18 | + * makes Code.cfw.gs actually save GAS UrlFetchApp quota — without |
| 19 | + * it we'd have to fetchAll(N worker calls) and end up at parity |
| 20 | + * with the standard Code.gs. |
| 21 | + * |
| 22 | + * Trade-off summary (read before deploying): |
| 23 | + * + Per-call latency drops from ~250-500 ms (Apps Script internal |
| 24 | + * hop) to ~10-50 ms (CF edge). Visibly snappier for chat-style |
| 25 | + * workloads (Telegram, page navigation). |
| 26 | + * + Apps Script *runtime* quota (90 min/day on consumer accounts) |
| 27 | + * stretches significantly because each call now spends almost all |
| 28 | + * its time in the network leg to the Worker, not in the body |
| 29 | + * fetch + base64 + header processing. |
| 30 | + * + Apps Script *UrlFetchApp count* quota stretches roughly Nx for |
| 31 | + * an N-URL batch because the batch is sent as a small number of |
| 32 | + * POSTs to the Worker (one per chunk of WORKER_BATCH_CHUNK URLs), |
| 33 | + * not fanned out per-URL via fetchAll. For mhrv-rs's typical |
| 34 | + * 5-30 URL batches that's 1 GAS call (vs N under standard |
| 35 | + * Code.gs). Single non-batched requests still count 1:1. |
| 36 | + * - YouTube long-form streaming gets WORSE. Apps Script allows |
| 37 | + * ~6 min wall per execution; CF Workers cap at 30 s wall. The |
| 38 | + * SABR cliff hits sooner. For YouTube-heavy use, keep the |
| 39 | + * standard Code.gs (apps_script mode). |
| 40 | + * - Batch mode now has a per-batch wall, not per-URL: Promise.all |
| 41 | + * resolves only when every fetch finishes, so the slowest URL |
| 42 | + * dominates. mhrv-rs already retries failed batch items |
| 43 | + * individually, so failure modes are graceful, but it's a real |
| 44 | + * behavioural change vs Code.gs's per-URL fetchAll wall. |
| 45 | + * - Cloudflare anti-bot challenges on destination sites can be |
| 46 | + * stricter — exit IP is now in CF's own range, which CF's |
| 47 | + * anti-bot fingerprints as a worker-internal request. This is |
| 48 | + * a different problem than DPI bypass; not solved by either |
| 49 | + * variant. |
| 50 | + * |
| 51 | + * Deployment: |
| 52 | + * 1. Deploy assets/cloudflare/worker.js to Cloudflare Workers first |
| 53 | + * (set its AUTH_KEY to a strong secret). |
| 54 | + * 2. Note the *.workers.dev URL of that Worker. |
| 55 | + * 3. Open https://script.google.com → New project, delete default code. |
| 56 | + * 4. Paste THIS entire file. |
| 57 | + * 5. Set AUTH_KEY (must match the Worker's AUTH_KEY and your mhrv-rs |
| 58 | + * config's auth_key — all three identical). |
| 59 | + * 6. Set WORKER_URL to your *.workers.dev URL (must include https://). |
| 60 | + * 7. Deploy → New deployment → Web app |
| 61 | + * Execute as: Me | Who has access: Anyone |
| 62 | + * 8. Copy the Deployment ID into mhrv-rs config.json as "script_id". |
| 63 | + * mhrv-rs does not need to know about Cloudflare; it talks to |
| 64 | + * Apps Script the same way it always has. |
| 65 | + * |
| 66 | + * CHANGE THESE TWO CONSTANTS BELOW. |
| 67 | + * |
| 68 | + * Upstream credit for the GAS-→-Worker pattern: github.com/denuitt1/mhr-cfw. |
| 69 | + * This file inherits the hardening (decoy-on-bad-auth, hop-loop guard) |
| 70 | + * from the standard Code.gs. |
| 71 | + */ |
| 72 | + |
| 73 | +const AUTH_KEY = "CHANGE_ME_TO_A_STRONG_SECRET"; |
| 74 | + |
| 75 | +// Full https://… URL of the Cloudflare Worker you deployed using |
| 76 | +// assets/cloudflare/worker.js. Must include the scheme. |
| 77 | +const WORKER_URL = "https://CHANGE_ME.workers.dev"; |
| 78 | + |
| 79 | +// ── Sentinels — DO NOT EDIT ───────────────────────────────── |
| 80 | +// These two constants are NOT configuration. They are the literal |
| 81 | +// template-default values used by the fail-closed check in doPost so |
| 82 | +// that a forgotten edit (AUTH_KEY or WORKER_URL still set to the |
| 83 | +// placeholder) returns a loud error instead of silently accepting the |
| 84 | +// placeholder secret or POSTing to a bogus URL. Configure AUTH_KEY |
| 85 | +// and WORKER_URL above; leave these alone. |
| 86 | +const DEFAULT_AUTH_KEY = "CHANGE_ME_TO_A_STRONG_SECRET"; |
| 87 | +const DEFAULT_WORKER_URL = "https://CHANGE_ME.workers.dev"; |
| 88 | + |
| 89 | +// Must match the Worker's MAX_BATCH_SIZE. Batches larger than this |
| 90 | +// are split into chunks of this size and dispatched via fetchAll — |
| 91 | +// each chunk costs 1 GAS UrlFetchApp call, so an N-URL batch costs |
| 92 | +// ceil(N/CHUNK) calls (still much cheaper than the per-URL cost |
| 93 | +// under standard Code.gs's fetchAll). |
| 94 | +const WORKER_BATCH_CHUNK = 40; |
| 95 | + |
| 96 | +// Active-probing defense — same semantics as Code.gs. Bad-auth and |
| 97 | +// malformed POST bodies receive a decoy HTML page that looks like a |
| 98 | +// placeholder Apps Script web app instead of the JSON `{e}` error, |
| 99 | +// so probes can't fingerprint the deployment as a relay endpoint. |
| 100 | +// Flip to `true` only during initial setup if you need to debug an |
| 101 | +// "unauthorized" loop, then flip back before sharing the deployment. |
| 102 | +const DIAGNOSTIC_MODE = false; |
| 103 | + |
| 104 | +const SKIP_HEADERS = { |
| 105 | + host: 1, connection: 1, "content-length": 1, |
| 106 | + "transfer-encoding": 1, "proxy-connection": 1, "proxy-authorization": 1, |
| 107 | + "priority": 1, te: 1, |
| 108 | +}; |
| 109 | + |
| 110 | +const DECOY_HTML = |
| 111 | + '<!DOCTYPE html><html><head><title>Web App</title></head>' + |
| 112 | + '<body><p>The script completed but did not return anything.</p>' + |
| 113 | + '</body></html>'; |
| 114 | + |
| 115 | +// ── Request Handlers ──────────────────────────────────────── |
| 116 | + |
| 117 | +function _decoyOrError(jsonBody) { |
| 118 | + if (DIAGNOSTIC_MODE) return _json(jsonBody); |
| 119 | + return ContentService |
| 120 | + .createTextOutput(DECOY_HTML) |
| 121 | + .setMimeType(ContentService.MimeType.HTML); |
| 122 | +} |
| 123 | + |
| 124 | +function doPost(e) { |
| 125 | + try { |
| 126 | + // Fail-closed if either constant is still the template default. |
| 127 | + // Without this, a forgotten edit would either accept the placeholder |
| 128 | + // secret as valid auth or POST to a literal "CHANGE_ME" URL — both |
| 129 | + // are silent failure modes a deploy might miss. Surface them loud. |
| 130 | + if (AUTH_KEY === DEFAULT_AUTH_KEY) { |
| 131 | + return _json({ e: "configure AUTH_KEY in Code.cfw.gs" }); |
| 132 | + } |
| 133 | + if (WORKER_URL === DEFAULT_WORKER_URL) { |
| 134 | + return _json({ e: "configure WORKER_URL in Code.cfw.gs" }); |
| 135 | + } |
| 136 | + |
| 137 | + var req = JSON.parse(e.postData.contents); |
| 138 | + if (req.k !== AUTH_KEY) return _decoyOrError({ e: "unauthorized" }); |
| 139 | + |
| 140 | + if (Array.isArray(req.q)) return _doBatch(req.q); |
| 141 | + return _doSingle(req); |
| 142 | + } catch (err) { |
| 143 | + return _decoyOrError({ e: String(err) }); |
| 144 | + } |
| 145 | +} |
| 146 | + |
| 147 | +function doGet(e) { |
| 148 | + return ContentService |
| 149 | + .createTextOutput(DECOY_HTML) |
| 150 | + .setMimeType(ContentService.MimeType.HTML); |
| 151 | +} |
| 152 | + |
| 153 | +// ── Worker Forwarding ────────────────────────────────────── |
| 154 | + |
| 155 | +/** |
| 156 | + * Strip headers that must not be forwarded (hop-by-hop / Apps-Script- |
| 157 | + * managed). Returns a fresh header map; the input is never mutated. |
| 158 | + */ |
| 159 | +function _scrubHeaders(rawHeaders) { |
| 160 | + var out = {}; |
| 161 | + if (rawHeaders && typeof rawHeaders === "object") { |
| 162 | + for (var k in rawHeaders) { |
| 163 | + if (rawHeaders.hasOwnProperty(k) && !SKIP_HEADERS[k.toLowerCase()]) { |
| 164 | + out[k] = rawHeaders[k]; |
| 165 | + } |
| 166 | + } |
| 167 | + } |
| 168 | + return out; |
| 169 | +} |
| 170 | + |
| 171 | +/** |
| 172 | + * Normalize one request item into the shape the Worker expects. |
| 173 | + * Used for both single and batch paths — single mode wraps this in |
| 174 | + * `{k, ...item}`; batch mode wraps it in `{k, q: [item, ...]}`. |
| 175 | + * Auth key is added at envelope level by callers, not per-item. |
| 176 | + */ |
| 177 | +function _normalizeItem(item) { |
| 178 | + return { |
| 179 | + u: item.u, |
| 180 | + m: (item.m || "GET").toUpperCase(), |
| 181 | + h: _scrubHeaders(item.h), |
| 182 | + b: item.b || null, |
| 183 | + ct: item.ct || null, |
| 184 | + r: item.r !== false, |
| 185 | + }; |
| 186 | +} |
| 187 | + |
| 188 | +function _workerFetchOptions(payload) { |
| 189 | + return { |
| 190 | + url: WORKER_URL, |
| 191 | + method: "post", |
| 192 | + contentType: "application/json", |
| 193 | + payload: JSON.stringify(payload), |
| 194 | + muteHttpExceptions: true, |
| 195 | + followRedirects: true, |
| 196 | + validateHttpsCertificates: true, |
| 197 | + }; |
| 198 | +} |
| 199 | + |
| 200 | +// ── Single Request ───────────────────────────────────────── |
| 201 | + |
| 202 | +function _doSingle(req) { |
| 203 | + if (!req.u || typeof req.u !== "string" || !req.u.match(/^https?:\/\//i)) { |
| 204 | + return _json({ e: "bad url" }); |
| 205 | + } |
| 206 | + |
| 207 | + var item = _normalizeItem(req); |
| 208 | + var envelope = { |
| 209 | + k: AUTH_KEY, |
| 210 | + u: item.u, |
| 211 | + m: item.m, |
| 212 | + h: item.h, |
| 213 | + b: item.b, |
| 214 | + ct: item.ct, |
| 215 | + r: item.r, |
| 216 | + }; |
| 217 | + var opts = _workerFetchOptions(envelope); |
| 218 | + // muteHttpExceptions covers HTTP-level errors (4xx/5xx come back as |
| 219 | + // a normal HTTPResponse). It does NOT cover network-level failures |
| 220 | + // — DNS resolution failure, TLS handshake failure, connection |
| 221 | + // timeout to *.workers.dev, etc. — those throw. Catch and surface |
| 222 | + // them as `{e}` so the operator debugging "why isn't my deployment |
| 223 | + // responding?" gets a useful signal instead of the doPost outer |
| 224 | + // catch returning the decoy HTML page (which makes the deployment |
| 225 | + // look like a bad-auth probe to the client). Auth has already |
| 226 | + // passed at this point so the probe-defence argument doesn't apply. |
| 227 | + var resp; |
| 228 | + try { |
| 229 | + resp = UrlFetchApp.fetch(opts.url, opts); |
| 230 | + } catch (err) { |
| 231 | + return _json({ e: "worker unreachable: " + String(err) }); |
| 232 | + } |
| 233 | + return _json(_parseWorkerJson(resp)); |
| 234 | +} |
| 235 | + |
| 236 | +// ── Batch Request ────────────────────────────────────────── |
| 237 | + |
| 238 | +/** |
| 239 | + * Forward a batch to the Worker, chunking when needed. Each chunk |
| 240 | + * becomes ONE POST to the Worker; the Worker fans out across the URLs |
| 241 | + * in the chunk via Promise.all and returns `{q: [...]}` in the same |
| 242 | + * order. Multiple chunks fire in parallel via UrlFetchApp.fetchAll. |
| 243 | + * |
| 244 | + * Quota cost: ceil(N / WORKER_BATCH_CHUNK) GAS UrlFetchApp calls for |
| 245 | + * an N-URL batch. For typical mhrv-rs batches of 5-30 URLs this is |
| 246 | + * exactly 1 call (vs N under standard Code.gs's fetchAll). Larger |
| 247 | + * batches gracefully degrade to a few calls instead of failing under |
| 248 | + * the Worker's own MAX_BATCH_SIZE soft cap. |
| 249 | + * |
| 250 | + * Bad-URL items are filtered locally so the Worker only sees valid |
| 251 | + * inputs, then re-interleaved into the result array in original order |
| 252 | + * so mhrv-rs's batch-index assumptions hold. |
| 253 | + */ |
| 254 | +function _doBatch(items) { |
| 255 | + var validItems = []; |
| 256 | + var errorMap = {}; |
| 257 | + |
| 258 | + for (var i = 0; i < items.length; i++) { |
| 259 | + var item = items[i]; |
| 260 | + if (!item.u || typeof item.u !== "string" || !item.u.match(/^https?:\/\//i)) { |
| 261 | + errorMap[i] = "bad url"; |
| 262 | + continue; |
| 263 | + } |
| 264 | + validItems.push(_normalizeItem(item)); |
| 265 | + } |
| 266 | + |
| 267 | + var workerResults = []; |
| 268 | + if (validItems.length > 0) { |
| 269 | + // Split into chunks ≤ WORKER_BATCH_CHUNK so each Worker call stays |
| 270 | + // under the Worker's MAX_BATCH_SIZE cap. Single-chunk fast path |
| 271 | + // avoids the fetchAll overhead for the common case. |
| 272 | + var chunks = []; |
| 273 | + for (var c = 0; c < validItems.length; c += WORKER_BATCH_CHUNK) { |
| 274 | + chunks.push(validItems.slice(c, c + WORKER_BATCH_CHUNK)); |
| 275 | + } |
| 276 | + |
| 277 | + var fetchOpts = chunks.map(function(chunk) { |
| 278 | + return _workerFetchOptions({ k: AUTH_KEY, q: chunk }); |
| 279 | + }); |
| 280 | + |
| 281 | + // muteHttpExceptions covers HTTP-level errors. Network-level |
| 282 | + // failures (DNS, TLS, connection timeout to *.workers.dev) still |
| 283 | + // throw — catch and convert to per-chunk `{e}` errors that get |
| 284 | + // spread across each chunk's slots. mhrv-rs's per-item retry |
| 285 | + // then handles them individually instead of getting the decoy |
| 286 | + // HTML page from the doPost outer catch. See _doSingle for why |
| 287 | + // the probe-defence argument doesn't apply post-auth. |
| 288 | + var responses; |
| 289 | + try { |
| 290 | + if (fetchOpts.length === 1) { |
| 291 | + responses = [UrlFetchApp.fetch(fetchOpts[0].url, fetchOpts[0])]; |
| 292 | + } else { |
| 293 | + responses = UrlFetchApp.fetchAll(fetchOpts); |
| 294 | + } |
| 295 | + } catch (err) { |
| 296 | + var unreachable = { e: "worker unreachable: " + String(err) }; |
| 297 | + for (var u = 0; u < validItems.length; u++) workerResults.push(unreachable); |
| 298 | + // Skip the per-response loop below by returning early through the |
| 299 | + // reassembly code path. |
| 300 | + responses = null; |
| 301 | + } |
| 302 | + |
| 303 | + for (var r = 0; responses && r < responses.length; r++) { |
| 304 | + var parsed = _parseWorkerJson(responses[r]); |
| 305 | + if (parsed && Array.isArray(parsed.q)) { |
| 306 | + for (var k = 0; k < parsed.q.length; k++) { |
| 307 | + workerResults.push(parsed.q[k]); |
| 308 | + } |
| 309 | + } else { |
| 310 | + // Per-chunk failure (worker error, parse failure, auth, etc). |
| 311 | + // Spread the same error to every slot in this chunk so mhrv-rs |
| 312 | + // retries each item individually rather than masking the |
| 313 | + // failure. Other chunks are unaffected. |
| 314 | + var slotErr = (parsed && parsed.e) |
| 315 | + ? { e: parsed.e } |
| 316 | + : { e: "worker batch failure" }; |
| 317 | + for (var s = 0; s < chunks[r].length; s++) workerResults.push(slotErr); |
| 318 | + } |
| 319 | + } |
| 320 | + } |
| 321 | + |
| 322 | + // Reassemble into the original order: validated slots get their |
| 323 | + // worker result; invalid slots get their pre-flight error. |
| 324 | + var results = []; |
| 325 | + var wi = 0; |
| 326 | + for (var j = 0; j < items.length; j++) { |
| 327 | + if (errorMap.hasOwnProperty(j)) { |
| 328 | + results.push({ e: errorMap[j] }); |
| 329 | + } else { |
| 330 | + results.push(workerResults[wi++] || { e: "missing worker response" }); |
| 331 | + } |
| 332 | + } |
| 333 | + return _json({ q: results }); |
| 334 | +} |
| 335 | + |
| 336 | +// ── Worker response handling ─────────────────────────────── |
| 337 | + |
| 338 | +/** |
| 339 | + * Parse the Worker's JSON envelope. Worker errors come back as |
| 340 | + * `{e: "..."}` — pass them through to the client unchanged so mhrv-rs |
| 341 | + * sees the same error-shape it would for a direct-fetch failure in |
| 342 | + * Code.gs. On HTTP errors from the Worker itself (auth failure, 5xx, |
| 343 | + * etc.), wrap into `{e}` so the client gets a useful message instead |
| 344 | + * of a parse-failure. |
| 345 | + */ |
| 346 | +function _parseWorkerJson(resp) { |
| 347 | + var code = resp.getResponseCode(); |
| 348 | + var text = resp.getContentText(); |
| 349 | + try { |
| 350 | + return JSON.parse(text); |
| 351 | + } catch (err) { |
| 352 | + return { e: "worker " + code + ": " + (text.length > 200 ? text.substring(0, 200) + "…" : text) }; |
| 353 | + } |
| 354 | +} |
| 355 | + |
| 356 | +function _json(obj) { |
| 357 | + return ContentService.createTextOutput(JSON.stringify(obj)).setMimeType( |
| 358 | + ContentService.MimeType.JSON |
| 359 | + ); |
| 360 | +} |
0 commit comments