From 7942e83d1dd569b857ca9049b41a9447cabf0f04 Mon Sep 17 00:00:00 2001 From: jab Date: Tue, 12 Feb 2019 00:32:47 +0000 Subject: [PATCH] update to latest trio, use contextvars for logging --- .pylintrc | 3 + README.rst | 110 +++++++++++++++++++++++++++++ __init__.py | 7 ++ trio_http_proxy.py | 169 ++++++++++++++++++++++----------------------- 4 files changed, 202 insertions(+), 87 deletions(-) create mode 100644 .pylintrc create mode 100644 README.rst create mode 100644 __init__.py diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..6b7bafa --- /dev/null +++ b/.pylintrc @@ -0,0 +1,3 @@ +# https://docs.pylint.org/en/latest/technical_reference/features.html +[MESSAGES CONTROL] +disable=line-too-long,missing-docstring diff --git a/README.rst b/README.rst new file mode 100644 index 0000000..ebba2ad --- /dev/null +++ b/README.rst @@ -0,0 +1,110 @@ +trio_http_proxy.py +================== + +Simple HTTP CONNECT proxy implemented with +`Trio `__. + +Tested with Python 3.7 and Trio 0.11.0 +(but other versions probably work too). + + +Why +--- + +- An HTTP CONNECT proxy is one of the simplest + async I/O things you can build + that does something real. + Namely, you can load HTTPS sites + (including streaming YouTube and Netflix) + through it. + +- If you're trying to access content that's restricted by an IP-based geofence, + you could run this from a machine inside the geofence to get access! + + Note: Please consult the relevant terms and conditions first + to make sure you wouldn't be breaking the rules. + 😇 + + Also note: Many popular streaming services + blacklist IPs of major cloud hosting providers + to thwart unauthorized geofence hopping. + So you'd need to run this from + `some other hosting provider `__. + +- I was sold on Trio *before* I saw + `@njsmith `__ + `live code happy eyeballs in 40 lines of Python + `__. + 🙀 + + If you haven't yet read his post, + `Notes on structured concurrency, or: Go statement considered harmful + `__ + definitely check it out. + + +Instructions +------------ + +#. Install Trio if you haven't already. + + .. code-block:: + + pip install trio + +#. In one shell session, run this script to start the proxy on port 8080: + + .. code-block:: + + ./trio_http_proxy.py + * Starting HTTP proxy on port 8080... + + (You can set the PORT env var to use a different port if you prefer.) + +#. In another session, make an HTTPS request through the proxy, e.g. + + .. code-block:: + + curl -x http://127.0.0.1:8080 https://canhazip.com + + You should get the response you were expecting from the destination server, + and should see output from the proxy in the first shell session + about the forwarded data, e.g. + + .. code-block:: + + [conn1] Reading... + [conn1] Got CONNECT request for canhazip.com, connecting... + [conn1 <> canhazip.com] Connected to canhazip.com, sending 200 response... + [conn1 <> canhazip.com] Sent "200 Connection established" to client, tunnel established. + [conn1 -> canhazip.com] Forwarded 196 bytes + [conn1 <- canhazip.com] Forwarded 2954 bytes + ... + +#. You can even configure your OS or browser to use the proxy, + and then try visiting some HTTPS websites as you would normally. + It works! 💪 + + HTTP sites won't work because the proxy only handles HTTP CONNECT requests. + But HTTP is weak sauce anyways. 🤓 + + *A YouTube video streaming through the proxy:* + + .. image:: https://user-images.githubusercontent.com/64992/38785817-c03acd0a-414d-11e8-8f4a-2c5aa27e79e6.png + :alt: screenshot of a YouTube video streaming through the proxy + + *Changing system proxy settings on macOS:* + + .. image:: https://user-images.githubusercontent.com/64992/38785931-b657d804-414e-11e8-8cfa-e05a11364f7d.png + :alt: screenshot of changing system proxy settings on macOS + +#. When you're done, just hit Ctrl+C to kill the server. + Don't forget to restore any proxy settings you changed + to how they were set before. + + +For a one-liner test that only requires one shell session, run: + +.. code-block:: + + ./trio_http_proxy.py & sleep 1; curl -x http://127.0.0.1:8080 https://canhazip.com ; kill %1 diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..489d44d --- /dev/null +++ b/__init__.py @@ -0,0 +1,7 @@ +# Copyright 2018-2019 Joshua Bronson. All Rights Reserved. +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +from .trio_http_proxy import * diff --git a/trio_http_proxy.py b/trio_http_proxy.py index 362f409..38017d6 100755 --- a/trio_http_proxy.py +++ b/trio_http_proxy.py @@ -1,132 +1,127 @@ #!/usr/bin/env python3 -# Copyright 2018 Joshua Bronson. All Rights Reserved. +# Copyright 2018-2019 Joshua Bronson. All Rights Reserved. # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this # file, You can obtain one at http://mozilla.org/MPL/2.0/. -""" -Simple HTTP CONNECT proxy implemented with trio: https://trio.readthedocs.io - -Tested with Python 3.6 and Trio 0.3.0. - -Instructions: - -#. In one terminal, run this script to start the proxy on port 8080: - - $ ./trio_http_proxy.py - * Starting HTTP proxy on port 8080... - - (You can set the PORT env var to use a different port if you prefer.) - -#. In another terminal, make an HTTPS request through the proxy, e.g. - - $ curl -x http://127.0.0.1:8080 https://canhazip.com - - You should get the response you were expecting from the destination server, - and should see output in the first terminal about the forwarded data, e.g. - - [conn1] Got CONNECT request for canhazip.com - [conn1] Connected to canhazip.com, sending 200 response... - [conn1] Sent "200 Connection established" to client - [conn1 -> canhazip.com] Forwarded 196 bytes - [conn1 <- canhazip.com] Forwarded 2954 bytes - ... - -#. For even moar proxy amaze, - configure your OS or web browser to use the proxy, - and then try browsing to some HTTPS websites. - It works! 💪 - - HTTP sites won't work because the proxy only handles HTTP CONNECT requests. - But http is weak sauce anyways. 🤓 - -#. When you're done, just hit Ctrl+C to kill the server. - Don't forget to restore any proxy settings you changed - to how they were set before. - -""" - -from itertools import count from functools import partial +from itertools import count from os import getenv from textwrap import indent from traceback import format_exc -import trio +from contextvars import ContextVar +from trio import open_nursery, open_tcp_stream, run, serve_tcp -PORT = int(getenv('PORT', 8080)) -DEFAULT_BUFLEN = 16384 -indented = partial(indent, prefix=' ') -decoded_and_indented = lambda some_bytes: indented(some_bytes.decode()) +DEFAULT_PORT = 8080 +PORT = int(getenv('PORT', DEFAULT_PORT)) # pylint: disable=invalid-envvar-default +BUFMAXLEN = 16384 +OK_CONNECT_PORTS = {443, 8443} + +prn = partial(print, end='') # pylint: disable=C0103 +indented = partial(indent, prefix=' ') # pylint: disable=C0103 +decoded_and_indented = lambda some_bytes: indented(some_bytes.decode()) # pylint: disable=C0103 + +CV_CLIENT_STREAM = ContextVar('client_stream', default=None) +CV_DEST_STREAM = ContextVar('dest_stream', default=None) +CV_PIPE_FROM = ContextVar('pipe_from', default=None) -async def start_server(port=PORT): - print(f'* Starting HTTP proxy on port {port or "(OS-chosen available port)"}...') +async def http_proxy(client_stream, _connidgen=count(1)): + client_stream.id = next(_connidgen) + CV_CLIENT_STREAM.set(client_stream) + async with client_stream: + try: + dest_stream = await tunnel(client_stream) + async with dest_stream, open_nursery() as nursery: + nursery.start_soon(pipe, client_stream, dest_stream) + nursery.start_soon(pipe, dest_stream, client_stream) + except Exception: # pylint: disable=broad-except + log(f'\n{indented(format_exc())}') + + +async def start_server(server=http_proxy, port=PORT): + print(f'* Starting {server.__name__} on port {port or "(OS-selected port)"}...') try: - await trio.serve_tcp(http_proxy, port) + await serve_tcp(server, port) except KeyboardInterrupt: print('\nGoodbye for now.') -async def http_proxy(client_stream, _identgen=count(1)): - ident = next(_identgen) - async with client_stream: - try: - dest_stream, dest = await tunnel(client_stream, log=mklog(f'conn{ident}')) - async with dest_stream, trio.open_nursery() as nursery: - nursery.start_soon(pipe, client_stream, dest_stream, mklog(f'conn{ident} -> {dest}')) - nursery.start_soon(pipe, dest_stream, client_stream, mklog(f'conn{ident} <- {dest}')) - except Exception: - print(f'[conn{ident}]:\n{indented(format_exc())}') - - -async def tunnel(client_stream, log=print): +async def tunnel(client_stream): """Given a stream from a client containing an HTTP CONNECT request, open a connection to the destination server specified in the CONNECT request, and notify the client when the end-to-end connection has been established. Return the destination stream and the corresponding host. """ - dest = await read_and_get_dest_from_http_connect_request(client_stream, log=log) - log(f'Got CONNECT request for {dest}, connecting...') - dest_stream = await trio.open_tcp_stream(dest, 443) - log(f'Connected to {dest}, sending 200 response...') + desthost, destport = await process_as_http_connect_request(client_stream) + log(f'Got CONNECT request for {desthost}:{destport}, connecting...') + dest_stream = await open_tcp_stream(desthost, destport) + dest_stream.host = desthost + dest_stream.port = destport + CV_DEST_STREAM.set(dest_stream) + log(f'Connected to {desthost}, sending 200 response...') await client_stream.send_all(b'HTTP/1.1 200 Connection established\r\n\r\n') - log('Sent "200 Connection established" to client, tunnel established.') - return dest_stream, dest + log('Sent 200 to client, tunnel established.') + return dest_stream -async def read_and_get_dest_from_http_connect_request(stream, maxlen=256, log=print): - """Read a stream expected to contain a valid HTTP CONNECT request to desthost:443. +async def process_as_http_connect_request(stream, bufmaxlen=BUFMAXLEN): + """Read a stream expected to contain a valid HTTP CONNECT request to desthost:destport. Parse and return the destination host. Validate (lightly) and raise if request invalid. + See https://tools.ietf.org/html/rfc7231#section-4.3.6 for the CONNECT spec. """ log(f'Reading...') - bytes_read = await stream.receive_some(maxlen) - assert bytes_read.endswith(b'\r\n\r\n'), f'CONNECT request did not fit in {maxlen} bytes?\n{decoded_and_indented(bytes_read)}' + bytes_read = await stream.receive_some(bufmaxlen) + assert bytes_read.endswith(b'\r\n\r\n'), f'CONNECT request did not fit in {bufmaxlen} bytes?\n{decoded_and_indented(bytes_read)}' + # Only examine the first two tokens (e.g. "CONNECT example.com:443 [ignored...]"). + # The Host header should duplicate the CONNECT request's authority and should therefore be safe + # to ignore. Plus apparently some clients (iOS, Facebook) don't even send a Host header in + # CONNECT requests according to https://go-review.googlesource.com/c/go/+/44004. split = bytes_read.split(maxsplit=2) - assert len(split) == 3, f'No "CONNECT foo:443 HTTP/1.1"?\n{decoded_and_indented(bytes_read)}' - connect, dest, _ = split - assert connect == b'CONNECT', f'{connect}\n{decoded_and_indented(bytes_read)}' - assert dest.endswith(b':443'), f'{dest}\n{decoded_and_indented(bytes_read)}' - return dest[:-4].decode() + assert len(split) == 3, f'Expected " ..."\n{decoded_and_indented(bytes_read)}' + method, authority, _ = split + assert method == b'CONNECT', f'Expected "CONNECT", "{method}" unsupported\n{decoded_and_indented(bytes_read)}' + desthost, colon, destport = authority.partition(b':') + assert colon and destport, f'Expected ":" in {authority}\n{decoded_and_indented(bytes_read)}' + destport = int(destport.decode()) + assert destport in OK_CONNECT_PORTS, f'Forbidden destination port: {destport}' + return desthost.decode(), destport -async def pipe(from_stream, to_stream, log=print, buflen=DEFAULT_BUFLEN): +async def read_all(stream, bufmaxlen=BUFMAXLEN): while True: - chunk = await from_stream.receive_some(buflen) + chunk = await stream.receive_some(bufmaxlen) if not chunk: break + yield chunk + + +async def pipe(from_stream, to_stream, bufmaxlen=BUFMAXLEN): + CV_PIPE_FROM.set(from_stream) + async for chunk in read_all(from_stream, bufmaxlen=bufmaxlen): # pylint: disable=E1133; https://github.com/PyCQA/pylint/issues/2311 await to_stream.send_all(chunk) log(f'Forwarded {len(chunk)} bytes') + log(f'Pipe finished') -def mklog(tag): - def log(*args, **kw): - print(f'[{tag}]', *args, **kw) - return log +def log(*args, **kw): + client_stream = CV_CLIENT_STREAM.get() + if client_stream: + prn(f'[conn{client_stream.id}') + dest_stream = CV_DEST_STREAM.get() + if dest_stream: + direction = '<>' + pipe_from = CV_PIPE_FROM.get() + if pipe_from: + direction = '->' if pipe_from is client_stream else '<-' + prn(f' {direction} {dest_stream.host}') + prn('] ') + print(*args, **kw) if __name__ == '__main__': - trio.run(start_server) + run(start_server)