update to latest trio, use contextvars for logging

This commit is contained in:
jab 2019-02-12 00:32:47 +00:00
parent 23a8055a2e
commit 7942e83d1d
4 changed files with 202 additions and 87 deletions

3
.pylintrc Normal file
View File

@ -0,0 +1,3 @@
# https://docs.pylint.org/en/latest/technical_reference/features.html
[MESSAGES CONTROL]
disable=line-too-long,missing-docstring

110
README.rst Normal file
View File

@ -0,0 +1,110 @@
trio_http_proxy.py
==================
Simple HTTP CONNECT proxy implemented with
`Trio <https://trio.readthedocs.io>`__.
Tested with Python 3.7 and Trio 0.11.0
(but other versions probably work too).
Why
---
- An HTTP CONNECT proxy is one of the simplest
async I/O things you can build
that does something real.
Namely, you can load HTTPS sites
(including streaming YouTube and Netflix)
through it.
- If you're trying to access content that's restricted by an IP-based geofence,
you could run this from a machine inside the geofence to get access!
Note: Please consult the relevant terms and conditions first
to make sure you wouldn't be breaking the rules.
😇
Also note: Many popular streaming services
blacklist IPs of major cloud hosting providers
to thwart unauthorized geofence hopping.
So you'd need to run this from
`some other hosting provider <http://lowendbox.com>`__.
- I was sold on Trio *before* I saw
`@njsmith <https://github.com/njsmith>`__
`live code happy eyeballs in 40 lines of Python
<https://www.youtube.com/watch?v=i-R704I8ySE>`__.
🙀
If you haven't yet read his post,
`Notes on structured concurrency, or: Go statement considered harmful
<https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/>`__
definitely check it out.
Instructions
------------
#. Install Trio if you haven't already.
.. code-block::
pip install trio
#. In one shell session, run this script to start the proxy on port 8080:
.. code-block::
./trio_http_proxy.py
* Starting HTTP proxy on port 8080...
(You can set the PORT env var to use a different port if you prefer.)
#. In another session, make an HTTPS request through the proxy, e.g.
.. code-block::
curl -x http://127.0.0.1:8080 https://canhazip.com
You should get the response you were expecting from the destination server,
and should see output from the proxy in the first shell session
about the forwarded data, e.g.
.. code-block::
[conn1] Reading...
[conn1] Got CONNECT request for canhazip.com, connecting...
[conn1 <> canhazip.com] Connected to canhazip.com, sending 200 response...
[conn1 <> canhazip.com] Sent "200 Connection established" to client, tunnel established.
[conn1 -> canhazip.com] Forwarded 196 bytes
[conn1 <- canhazip.com] Forwarded 2954 bytes
...
#. You can even configure your OS or browser to use the proxy,
and then try visiting some HTTPS websites as you would normally.
It works! 💪
HTTP sites won't work because the proxy only handles HTTP CONNECT requests.
But HTTP is weak sauce anyways. 🤓
*A YouTube video streaming through the proxy:*
.. image:: https://user-images.githubusercontent.com/64992/38785817-c03acd0a-414d-11e8-8f4a-2c5aa27e79e6.png
:alt: screenshot of a YouTube video streaming through the proxy
*Changing system proxy settings on macOS:*
.. image:: https://user-images.githubusercontent.com/64992/38785931-b657d804-414e-11e8-8cfa-e05a11364f7d.png
:alt: screenshot of changing system proxy settings on macOS
#. When you're done, just hit Ctrl+C to kill the server.
Don't forget to restore any proxy settings you changed
to how they were set before.
For a one-liner test that only requires one shell session, run:
.. code-block::
./trio_http_proxy.py & sleep 1; curl -x http://127.0.0.1:8080 https://canhazip.com ; kill %1

7
__init__.py Normal file
View File

@ -0,0 +1,7 @@
# Copyright 2018-2019 Joshua Bronson. All Rights Reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from .trio_http_proxy import *

View File

@ -1,132 +1,127 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# Copyright 2018 Joshua Bronson. All Rights Reserved. # Copyright 2018-2019 Joshua Bronson. All Rights Reserved.
# #
# This Source Code Form is subject to the terms of the Mozilla Public # This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this # License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/. # file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Simple HTTP CONNECT proxy implemented with trio: https://trio.readthedocs.io
Tested with Python 3.6 and Trio 0.3.0.
Instructions:
#. In one terminal, run this script to start the proxy on port 8080:
$ ./trio_http_proxy.py
* Starting HTTP proxy on port 8080...
(You can set the PORT env var to use a different port if you prefer.)
#. In another terminal, make an HTTPS request through the proxy, e.g.
$ curl -x http://127.0.0.1:8080 https://canhazip.com
You should get the response you were expecting from the destination server,
and should see output in the first terminal about the forwarded data, e.g.
[conn1] Got CONNECT request for canhazip.com
[conn1] Connected to canhazip.com, sending 200 response...
[conn1] Sent "200 Connection established" to client
[conn1 -> canhazip.com] Forwarded 196 bytes
[conn1 <- canhazip.com] Forwarded 2954 bytes
...
#. For even moar proxy amaze,
configure your OS or web browser to use the proxy,
and then try browsing to some HTTPS websites.
It works! 💪
HTTP sites won't work because the proxy only handles HTTP CONNECT requests.
But http is weak sauce anyways. 🤓
#. When you're done, just hit Ctrl+C to kill the server.
Don't forget to restore any proxy settings you changed
to how they were set before.
"""
from itertools import count
from functools import partial from functools import partial
from itertools import count
from os import getenv from os import getenv
from textwrap import indent from textwrap import indent
from traceback import format_exc from traceback import format_exc
import trio from contextvars import ContextVar
from trio import open_nursery, open_tcp_stream, run, serve_tcp
PORT = int(getenv('PORT', 8080)) DEFAULT_PORT = 8080
DEFAULT_BUFLEN = 16384 PORT = int(getenv('PORT', DEFAULT_PORT)) # pylint: disable=invalid-envvar-default
indented = partial(indent, prefix=' ') BUFMAXLEN = 16384
decoded_and_indented = lambda some_bytes: indented(some_bytes.decode()) OK_CONNECT_PORTS = {443, 8443}
prn = partial(print, end='') # pylint: disable=C0103
indented = partial(indent, prefix=' ') # pylint: disable=C0103
decoded_and_indented = lambda some_bytes: indented(some_bytes.decode()) # pylint: disable=C0103
CV_CLIENT_STREAM = ContextVar('client_stream', default=None)
CV_DEST_STREAM = ContextVar('dest_stream', default=None)
CV_PIPE_FROM = ContextVar('pipe_from', default=None)
async def start_server(port=PORT): async def http_proxy(client_stream, _connidgen=count(1)):
print(f'* Starting HTTP proxy on port {port or "(OS-chosen available port)"}...') client_stream.id = next(_connidgen)
CV_CLIENT_STREAM.set(client_stream)
async with client_stream:
try:
dest_stream = await tunnel(client_stream)
async with dest_stream, open_nursery() as nursery:
nursery.start_soon(pipe, client_stream, dest_stream)
nursery.start_soon(pipe, dest_stream, client_stream)
except Exception: # pylint: disable=broad-except
log(f'\n{indented(format_exc())}')
async def start_server(server=http_proxy, port=PORT):
print(f'* Starting {server.__name__} on port {port or "(OS-selected port)"}...')
try: try:
await trio.serve_tcp(http_proxy, port) await serve_tcp(server, port)
except KeyboardInterrupt: except KeyboardInterrupt:
print('\nGoodbye for now.') print('\nGoodbye for now.')
async def http_proxy(client_stream, _identgen=count(1)): async def tunnel(client_stream):
ident = next(_identgen)
async with client_stream:
try:
dest_stream, dest = await tunnel(client_stream, log=mklog(f'conn{ident}'))
async with dest_stream, trio.open_nursery() as nursery:
nursery.start_soon(pipe, client_stream, dest_stream, mklog(f'conn{ident} -> {dest}'))
nursery.start_soon(pipe, dest_stream, client_stream, mklog(f'conn{ident} <- {dest}'))
except Exception:
print(f'[conn{ident}]:\n{indented(format_exc())}')
async def tunnel(client_stream, log=print):
"""Given a stream from a client containing an HTTP CONNECT request, """Given a stream from a client containing an HTTP CONNECT request,
open a connection to the destination server specified in the CONNECT request, open a connection to the destination server specified in the CONNECT request,
and notify the client when the end-to-end connection has been established. and notify the client when the end-to-end connection has been established.
Return the destination stream and the corresponding host. Return the destination stream and the corresponding host.
""" """
dest = await read_and_get_dest_from_http_connect_request(client_stream, log=log) desthost, destport = await process_as_http_connect_request(client_stream)
log(f'Got CONNECT request for {dest}, connecting...') log(f'Got CONNECT request for {desthost}:{destport}, connecting...')
dest_stream = await trio.open_tcp_stream(dest, 443) dest_stream = await open_tcp_stream(desthost, destport)
log(f'Connected to {dest}, sending 200 response...') dest_stream.host = desthost
dest_stream.port = destport
CV_DEST_STREAM.set(dest_stream)
log(f'Connected to {desthost}, sending 200 response...')
await client_stream.send_all(b'HTTP/1.1 200 Connection established\r\n\r\n') await client_stream.send_all(b'HTTP/1.1 200 Connection established\r\n\r\n')
log('Sent "200 Connection established" to client, tunnel established.') log('Sent 200 to client, tunnel established.')
return dest_stream, dest return dest_stream
async def read_and_get_dest_from_http_connect_request(stream, maxlen=256, log=print): async def process_as_http_connect_request(stream, bufmaxlen=BUFMAXLEN):
"""Read a stream expected to contain a valid HTTP CONNECT request to desthost:443. """Read a stream expected to contain a valid HTTP CONNECT request to desthost:destport.
Parse and return the destination host. Validate (lightly) and raise if request invalid. Parse and return the destination host. Validate (lightly) and raise if request invalid.
See https://tools.ietf.org/html/rfc7231#section-4.3.6 for the CONNECT spec.
""" """
log(f'Reading...') log(f'Reading...')
bytes_read = await stream.receive_some(maxlen) bytes_read = await stream.receive_some(bufmaxlen)
assert bytes_read.endswith(b'\r\n\r\n'), f'CONNECT request did not fit in {maxlen} bytes?\n{decoded_and_indented(bytes_read)}' assert bytes_read.endswith(b'\r\n\r\n'), f'CONNECT request did not fit in {bufmaxlen} bytes?\n{decoded_and_indented(bytes_read)}'
# Only examine the first two tokens (e.g. "CONNECT example.com:443 [ignored...]").
# The Host header should duplicate the CONNECT request's authority and should therefore be safe
# to ignore. Plus apparently some clients (iOS, Facebook) don't even send a Host header in
# CONNECT requests according to https://go-review.googlesource.com/c/go/+/44004.
split = bytes_read.split(maxsplit=2) split = bytes_read.split(maxsplit=2)
assert len(split) == 3, f'No "CONNECT foo:443 HTTP/1.1"?\n{decoded_and_indented(bytes_read)}' assert len(split) == 3, f'Expected "<method> <authority> ..."\n{decoded_and_indented(bytes_read)}'
connect, dest, _ = split method, authority, _ = split
assert connect == b'CONNECT', f'{connect}\n{decoded_and_indented(bytes_read)}' assert method == b'CONNECT', f'Expected "CONNECT", "{method}" unsupported\n{decoded_and_indented(bytes_read)}'
assert dest.endswith(b':443'), f'{dest}\n{decoded_and_indented(bytes_read)}' desthost, colon, destport = authority.partition(b':')
return dest[:-4].decode() assert colon and destport, f'Expected ":<port>" in {authority}\n{decoded_and_indented(bytes_read)}'
destport = int(destport.decode())
assert destport in OK_CONNECT_PORTS, f'Forbidden destination port: {destport}'
return desthost.decode(), destport
async def pipe(from_stream, to_stream, log=print, buflen=DEFAULT_BUFLEN): async def read_all(stream, bufmaxlen=BUFMAXLEN):
while True: while True:
chunk = await from_stream.receive_some(buflen) chunk = await stream.receive_some(bufmaxlen)
if not chunk: if not chunk:
break break
yield chunk
async def pipe(from_stream, to_stream, bufmaxlen=BUFMAXLEN):
CV_PIPE_FROM.set(from_stream)
async for chunk in read_all(from_stream, bufmaxlen=bufmaxlen): # pylint: disable=E1133; https://github.com/PyCQA/pylint/issues/2311
await to_stream.send_all(chunk) await to_stream.send_all(chunk)
log(f'Forwarded {len(chunk)} bytes') log(f'Forwarded {len(chunk)} bytes')
log(f'Pipe finished')
def mklog(tag): def log(*args, **kw):
def log(*args, **kw): client_stream = CV_CLIENT_STREAM.get()
print(f'[{tag}]', *args, **kw) if client_stream:
return log prn(f'[conn{client_stream.id}')
dest_stream = CV_DEST_STREAM.get()
if dest_stream:
direction = '<>'
pipe_from = CV_PIPE_FROM.get()
if pipe_from:
direction = '->' if pipe_from is client_stream else '<-'
prn(f' {direction} {dest_stream.host}')
prn('] ')
print(*args, **kw)
if __name__ == '__main__': if __name__ == '__main__':
trio.run(start_server) run(start_server)