solar/docker-chrome/cdp_proxy.py

147 lines
5.0 KiB
Python

#!/usr/bin/env python3
"""
Minimal CDP proxy: listen on 0.0.0.0:9222, forward to 127.0.0.1:9223.
- Rewrites Host header (client->upstream) to 127.0.0.1:9223 so Chrome's CDP accepts connections.
- Rewrites upstream responses: 127.0.0.1:9223 -> chrome:9222 so Playwright uses ws://chrome:9222/...
and the WebSocket connection goes through this proxy instead of backend's localhost.
"""
import socket
import re
import threading
import sys
TARGET = ("127.0.0.1", 9223)
LISTEN = ("0.0.0.0", 9222)
HOST_REPLACEMENT = b"Host: 127.0.0.1:9223"
# So Playwright connects to degelas-chrome:9222 (this proxy) for WebSocket, not 127.0.0.1:9223
WS_HOST_REPLACEMENT = b"degelas-chrome:9222"
def rewrite_host(data: bytes) -> bytes:
if b"Host:" not in data:
return data
return re.sub(rb"Host:\s*[^\r\n]+", HOST_REPLACEMENT, data, count=1, flags=re.IGNORECASE)
def rewrite_ws_url(data: bytes) -> bytes:
"""Replace 127.0.0.1:9223 with degelas-chrome:9222 in CDP JSON."""
return data.replace(b"127.0.0.1:9223", WS_HOST_REPLACEMENT)
def parse_content_length(headers: bytes) -> int | None:
"""Extract Content-Length from HTTP headers. Returns None if not found."""
m = re.search(rb"Content-Length:\s*(\d+)", headers, re.IGNORECASE)
return int(m.group(1)) if m else None
def update_content_length(headers: bytes, new_len: int) -> bytes:
"""Replace Content-Length value in headers with new_len."""
return re.sub(rb"Content-Length:\s*\d+", b"Content-Length: " + str(new_len).encode(), headers, count=1, flags=re.IGNORECASE)
def handle(client: socket.socket):
try:
upstream = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
upstream.settimeout(30)
upstream.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
upstream.connect(TARGET)
client.settimeout(30)
client.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)
first = b""
while b"\r\n\r\n" not in first and len(first) < 8192:
chunk = client.recv(4096)
if not chunk:
return
first += chunk
if first:
first = rewrite_host(first)
upstream.sendall(first)
# Read first response from upstream so we can rewrite body and fix Content-Length
resp = b""
while b"\r\n\r\n" not in resp and len(resp) < 16384:
chunk = upstream.recv(4096)
if not chunk:
break
resp += chunk
if resp:
head, _, body_start = resp.partition(b"\r\n\r\n")
clen = parse_content_length(head)
if clen is not None:
body = body_start[:clen]
if len(body_start) < clen:
while len(body) < clen:
more = upstream.recv(min(4096, clen - len(body)))
if not more:
break
body += more
new_body = rewrite_ws_url(body)
new_head = update_content_length(head, len(new_body))
client.sendall(new_head + b"\r\n\r\n" + new_body)
else:
client.sendall(resp)
# Bidirectional forward for any more data (e.g. same-connection WebSocket or next request)
def forward_client_to_upstream(src, dst):
try:
while True:
data = src.recv(4096)
if not data:
break
dst.sendall(data)
except (BrokenPipeError, ConnectionResetError, OSError):
pass
finally:
try:
src.shutdown(socket.SHUT_RD)
dst.shutdown(socket.SHUT_WR)
except OSError:
pass
def forward_upstream_to_client(src, dst):
try:
while True:
data = src.recv(4096)
if not data:
break
dst.sendall(data)
except (BrokenPipeError, ConnectionResetError, OSError):
pass
finally:
try:
src.shutdown(socket.SHUT_RD)
dst.shutdown(socket.SHUT_WR)
except OSError:
pass
t = threading.Thread(target=forward_client_to_upstream, args=(client, upstream))
t.daemon = True
t.start()
forward_upstream_to_client(upstream, client)
except Exception:
pass
finally:
try:
client.close()
except OSError:
pass
try:
upstream.close()
except NameError:
pass
except OSError:
pass
def main():
server = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
server.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
server.bind(LISTEN)
server.listen(50)
while True:
client, _ = server.accept()
threading.Thread(target=handle, args=(client,), daemon=True).start()
if __name__ == "__main__":
main()
sys.exit(0)