Skip to content

Commit 0beec6a

Browse files
committed
v0.9.1: normalize X/Twitter GraphQL URLs for cache hit rate (issue #16)
User @barzamini pointed out an optimization from the Python community (originally from seramo_ir): X/Twitter GraphQL URLs look like https://x.com/i/api/graphql/{hash}/{op}?variables=...&features=...&fieldToggles=... The features and fieldToggles params change across sessions and even within a session, busting our 50 MB response cache on every request to the same logical query. Stripping everything after 'variables=' lets identical logical queries collapse into one cache entry, dramatically reducing quota usage when browsing Twitter through the relay. Implementation: - src/domain_fronter.rs: new normalize_x_graphql_url() helper. Matches exactly the Python patch's pattern (host == 'x.com', path starts with /i/api/graphql/, query starts with variables=). Truncates at the first '&' past the '?'. Applied at the top of relay() so the normalized URL feeds BOTH the cache key AND the request sent to Apps Script — so we save on Apps Script quota too, not just on return-trip bytes. - src/config.rs: new opt-in normalize_x_graphql bool (default false). Off by default because strict X endpoints may reject trimmed requests; user should flip it on and watch for regressions. - src/bin/ui.rs: checkbox in the Advanced section, 'Normalize X/Twitter GraphQL URLs', with tooltip explaining the trade-off and crediting the source. - Four new unit tests in domain_fronter::tests covering: the happy path trim, non-x.com hosts pass through unchanged, non-graphql x.com paths pass through unchanged, and idempotency. 48 tests total, all green. Credit: idea by seramo_ir, Python patch at https://gist.github.com/seramo/0ae9e5d30ac23a73d5eb3bd2710fcd67, implementation request by @barzamini in issue #16.
1 parent 346daaa commit 0beec6a

5 files changed

Lines changed: 161 additions & 7 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "mhrv-rs"
3-
version = "0.9.0"
3+
version = "0.9.1"
44
edition = "2021"
55
description = "Rust port of MasterHttpRelayVPN -- DPI bypass via Google Apps Script relay with domain fronting"
66
license = "MIT"

src/bin/ui.rs

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,8 @@ struct FormState {
172172
fetch_ips_from_api: bool,
173173
max_ips_to_scan: usize,
174174
scan_batch_size:usize,
175-
google_ip_validation: bool
175+
google_ip_validation: bool,
176+
normalize_x_graphql: bool,
176177
}
177178

178179
#[derive(Clone, Debug)]
@@ -247,7 +248,8 @@ fn load_form() -> (FormState, Option<String>) {
247248
fetch_ips_from_api:c.fetch_ips_from_api,
248249
max_ips_to_scan:c.max_ips_to_scan,
249250
google_ip_validation: c.google_ip_validation,
250-
scan_batch_size:c.scan_batch_size
251+
scan_batch_size:c.scan_batch_size,
252+
normalize_x_graphql: c.normalize_x_graphql,
251253
}
252254
} else {
253255
FormState {
@@ -270,7 +272,8 @@ fn load_form() -> (FormState, Option<String>) {
270272
fetch_ips_from_api:false,
271273
max_ips_to_scan:100,
272274
google_ip_validation:true,
273-
scan_batch_size:500
275+
scan_batch_size:500,
276+
normalize_x_graphql: false,
274277
}
275278
};
276279
(form, load_err)
@@ -396,7 +399,8 @@ impl FormState {
396399
fetch_ips_from_api:self.fetch_ips_from_api,
397400
max_ips_to_scan: self.max_ips_to_scan,
398401
google_ip_validation:self.google_ip_validation,
399-
scan_batch_size:self.scan_batch_size
402+
scan_batch_size:self.scan_batch_size,
403+
normalize_x_graphql: self.normalize_x_graphql,
400404
})
401405
}
402406
}
@@ -433,6 +437,12 @@ struct ConfigWire<'a> {
433437
parallel_relay: u8,
434438
#[serde(skip_serializing_if = "Option::is_none")]
435439
sni_hosts: Option<Vec<&'a str>>,
440+
#[serde(skip_serializing_if = "is_false")]
441+
normalize_x_graphql: bool,
442+
}
443+
444+
fn is_false(b: &bool) -> bool {
445+
!*b
436446
}
437447

438448
fn is_zero_u8(v: &u8) -> bool {
@@ -470,6 +480,7 @@ impl<'a> From<&'a Config> for ConfigWire<'a> {
470480
.sni_hosts
471481
.as_ref()
472482
.map(|v| v.iter().map(String::as_str).collect()),
483+
normalize_x_graphql: c.normalize_x_graphql,
473484
}
474485
}
475486
}
@@ -763,6 +774,16 @@ impl eframe::App for App {
763774
ui.add_space(120.0 + 8.0);
764775
ui.checkbox(&mut self.form.show_auth_key, "Show auth key");
765776
});
777+
ui.horizontal(|ui| {
778+
ui.add_space(120.0 + 8.0);
779+
ui.checkbox(&mut self.form.normalize_x_graphql, "Normalize X/Twitter GraphQL URLs")
780+
.on_hover_text(
781+
"Trim the `features` / `fieldToggles` query params from x.com/i/api/graphql/… \
782+
requests before relaying. Massively improves cache hit rate when browsing \
783+
Twitter/X. Off by default — some endpoints may reject trimmed requests. \
784+
Credit: seramo_ir + Persian Python community (issue #16).",
785+
);
786+
});
766787
});
767788
});
768789

src/config.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,21 @@ pub struct Config {
9090
pub scan_batch_size:usize,
9191

9292
#[serde(default = "default_google_ip_validation")]
93-
pub google_ip_validation: bool
93+
pub google_ip_validation: bool,
94+
/// When true, GET requests to `x.com/i/api/graphql/<hash>/<op>?variables=…`
95+
/// have their query trimmed to just the `variables=` param before being
96+
/// relayed. The `features` / `fieldToggles` params that X ships with
97+
/// these requests change frequently and bust the response cache —
98+
/// stripping them dramatically improves hit rate on Twitter/X browsing.
99+
///
100+
/// Credit: idea from seramo_ir, originally adapted to the Python
101+
/// MasterHttpRelayVPN by the Persian community
102+
/// (https://gist.github.com/seramo/0ae9e5d30ac23a73d5eb3bd2710fcd67).
103+
///
104+
/// Off by default — some X endpoints may reject calls that omit
105+
/// features. Turn on and observe.
106+
#[serde(default)]
107+
pub normalize_x_graphql: bool,
94108
}
95109

96110
fn default_fetch_ips_from_api() -> bool { false }

src/domain_fronter.rs

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,12 @@ pub struct DomainFronter {
7777
/// Fan-out factor: fire this many Apps Script instances in parallel
7878
/// per request and return first success. `<= 1` = off.
7979
parallel_relay: usize,
80+
/// Enable the `normalize_x_graphql` URL rewrite (issue #16, credit
81+
/// seramo_ir). When true, GETs to `x.com/i/api/graphql/<hash>/<op>`
82+
/// have their query trimmed to the first `variables=` block so the
83+
/// response cache isn't busted by the constantly-changing `features`
84+
/// / `fieldToggles` params.
85+
normalize_x_graphql: bool,
8086
tls_connector: TlsConnector,
8187
pool: Arc<Mutex<Vec<PoolEntry>>>,
8288
cache: Arc<ResponseCache>,
@@ -172,6 +178,7 @@ impl DomainFronter {
172178
http_host: "script.google.com",
173179
auth_key: config.auth_key.clone(),
174180
parallel_relay: config.parallel_relay as usize,
181+
normalize_x_graphql: config.normalize_x_graphql,
175182
script_ids,
176183
script_idx: AtomicUsize::new(0),
177184
tls_connector,
@@ -388,6 +395,19 @@ impl DomainFronter {
388395
headers: &[(String, String)],
389396
body: &[u8],
390397
) -> Vec<u8> {
398+
// Optional URL rewrite for X/Twitter GraphQL (issue #16). Applied
399+
// here, at the top of relay(), so it affects BOTH the cache key
400+
// (so matching requests collapse into one entry) AND the URL that
401+
// gets sent upstream to Apps Script (so Apps Script only has to
402+
// fetch the trimmed variant, cutting quota usage).
403+
let normalized;
404+
let url: &str = if self.normalize_x_graphql {
405+
normalized = normalize_x_graphql_url(url);
406+
normalized.as_str()
407+
} else {
408+
url
409+
};
410+
391411
let coalescible = is_cacheable_method(method) && body.is_empty();
392412
let key = if coalescible { Some(cache_key(method, url)) } else { None };
393413
let t_start = Instant::now();
@@ -698,6 +718,58 @@ impl DomainFronter {
698718
/// strip Accept-Encoding: br (Apps Script can't decompress brotli).
699719
/// Extract the host (no scheme, no port, no path) from a URL string.
700720
/// Returns None for malformed / scheme-less inputs.
721+
/// Trim X/Twitter GraphQL URLs down to just the `variables=` query param,
722+
/// stripping everything from the first `&` in the query onward. See the
723+
/// `normalize_x_graphql` config field for the why.
724+
///
725+
/// Exact pattern mirrored from the Python community patch (issue #16):
726+
///
727+
/// host == "x.com"
728+
/// && path starts with "/i/api/graphql/"
729+
/// && query starts with "variables="
730+
/// → truncate at first `&` past the `?`.
731+
///
732+
/// Returns the possibly-rewritten URL. If the URL doesn't match the
733+
/// pattern the input is returned unchanged (as an owned String — the
734+
/// allocation is cheap on the slow path and keeps the caller's
735+
/// type-signature-juggling simple).
736+
fn normalize_x_graphql_url(url: &str) -> String {
737+
// Split host from the rest. We accept both "x.com" and common legacy
738+
// forms; the Python patch only checks x.com so we do the same to be
739+
// safe about the endpoint actually accepting truncated requests.
740+
let Some(rest) = url.strip_prefix("https://").or_else(|| url.strip_prefix("http://")) else {
741+
return url.to_string();
742+
};
743+
let Some(slash) = rest.find('/') else {
744+
return url.to_string();
745+
};
746+
let host = &rest[..slash];
747+
let path_and_query = &rest[slash..];
748+
749+
// Strip port if present in host.
750+
let host_no_port = host.split(':').next().unwrap_or(host);
751+
if host_no_port != "x.com" {
752+
return url.to_string();
753+
}
754+
755+
let Some(q_idx) = path_and_query.find('?') else {
756+
return url.to_string();
757+
};
758+
let path = &path_and_query[..q_idx];
759+
let query = &path_and_query[q_idx + 1..];
760+
761+
if !path.starts_with("/i/api/graphql/") || !query.starts_with("variables=") {
762+
return url.to_string();
763+
}
764+
765+
let new_query = match query.find('&') {
766+
Some(amp) => &query[..amp],
767+
None => query,
768+
};
769+
let scheme = if url.starts_with("https://") { "https://" } else { "http://" };
770+
format!("{}{}{}?{}", scheme, host, path, new_query)
771+
}
772+
701773
fn extract_host(url: &str) -> Option<String> {
702774
let after_scheme = url.split_once("://").map(|(_, rest)| rest).unwrap_or(url);
703775
let authority = after_scheme.split('/').next().unwrap_or("");
@@ -1230,6 +1302,53 @@ impl ServerCertVerifier for NoVerify {
12301302
mod tests {
12311303
use super::*;
12321304

1305+
#[test]
1306+
fn normalize_x_graphql_trims_after_variables() {
1307+
// Real-looking x.com GraphQL URL with variables + features +
1308+
// fieldToggles. Only the variables= prefix should survive.
1309+
let in_url = "https://x.com/i/api/graphql/abcd1234/TweetDetail?variables=%7B%22focalTweetId%22%3A%221234%22%7D&features=%7B%22responsive_web_graphql_timeline_navigation_enabled%22%3Atrue%7D&fieldToggles=%7B%22withArticleRichContentState%22%3Atrue%7D";
1310+
let out = normalize_x_graphql_url(in_url);
1311+
assert!(out.starts_with("https://x.com/i/api/graphql/abcd1234/TweetDetail?variables="));
1312+
assert!(!out.contains("features="));
1313+
assert!(!out.contains("fieldToggles="));
1314+
assert!(!out.contains('&'));
1315+
}
1316+
1317+
#[test]
1318+
fn normalize_x_graphql_leaves_non_x_hosts_alone() {
1319+
let cases = [
1320+
"https://twitter.com/i/api/graphql/x/y?variables=z&features=q",
1321+
"https://x.co/i/api/graphql/x/y?variables=z&features=q",
1322+
"https://api.x.com/i/api/graphql/x/y?variables=z&features=q",
1323+
"https://example.com/?variables=1&other=2",
1324+
];
1325+
for u in cases {
1326+
assert_eq!(normalize_x_graphql_url(u), u, "should pass through: {}", u);
1327+
}
1328+
}
1329+
1330+
#[test]
1331+
fn normalize_x_graphql_leaves_non_graphql_paths_alone() {
1332+
let cases = [
1333+
"https://x.com/home",
1334+
"https://x.com/i/api/2/notifications/view/generic.json",
1335+
"https://x.com/i/api/graphql/x/y", // no query
1336+
"https://x.com/i/api/graphql/x/y?features=1&variables=2", // variables not first
1337+
];
1338+
for u in cases {
1339+
assert_eq!(normalize_x_graphql_url(u), u, "should pass through: {}", u);
1340+
}
1341+
}
1342+
1343+
#[test]
1344+
fn normalize_x_graphql_is_idempotent() {
1345+
let once = normalize_x_graphql_url(
1346+
"https://x.com/i/api/graphql/H/Op?variables=%7B%7D&features=%7B%7D",
1347+
);
1348+
let twice = normalize_x_graphql_url(&once);
1349+
assert_eq!(once, twice);
1350+
}
1351+
12331352
#[test]
12341353
fn extract_host_strips_scheme_port_path() {
12351354
assert_eq!(extract_host("https://example.com/foo"), Some("example.com".into()));

0 commit comments

Comments
 (0)