update to latest trio, use contextvars for logging

This commit is contained in:
jab 2019-02-12 00:32:47 +00:00
parent 23a8055a2e
commit 7942e83d1d
4 changed files with 202 additions and 87 deletions

3
.pylintrc Normal file
View File

@ -0,0 +1,3 @@
# https://docs.pylint.org/en/latest/technical_reference/features.html
[MESSAGES CONTROL]
disable=line-too-long,missing-docstring

110
README.rst Normal file
View File

@ -0,0 +1,110 @@
trio_http_proxy.py
==================
Simple HTTP CONNECT proxy implemented with
`Trio <https://trio.readthedocs.io>`__.
Tested with Python 3.7 and Trio 0.11.0
(but other versions probably work too).
Why
---
- An HTTP CONNECT proxy is one of the simplest
async I/O things you can build
that does something real.
Namely, you can load HTTPS sites
(including streaming YouTube and Netflix)
through it.
- If you're trying to access content that's restricted by an IP-based geofence,
you could run this from a machine inside the geofence to get access!
Note: Please consult the relevant terms and conditions first
to make sure you wouldn't be breaking the rules.
😇
Also note: Many popular streaming services
blacklist IPs of major cloud hosting providers
to thwart unauthorized geofence hopping.
So you'd need to run this from
`some other hosting provider <http://lowendbox.com>`__.
- I was sold on Trio *before* I saw
`@njsmith <https://github.com/njsmith>`__
`live code happy eyeballs in 40 lines of Python
<https://www.youtube.com/watch?v=i-R704I8ySE>`__.
🙀
If you haven't yet read his post,
`Notes on structured concurrency, or: Go statement considered harmful
<https://vorpus.org/blog/notes-on-structured-concurrency-or-go-statement-considered-harmful/>`__
definitely check it out.
Instructions
------------
#. Install Trio if you haven't already.
.. code-block::
pip install trio
#. In one shell session, run this script to start the proxy on port 8080:
.. code-block::
./trio_http_proxy.py
* Starting HTTP proxy on port 8080...
(You can set the PORT env var to use a different port if you prefer.)
#. In another session, make an HTTPS request through the proxy, e.g.
.. code-block::
curl -x http://127.0.0.1:8080 https://canhazip.com
You should get the response you were expecting from the destination server,
and should see output from the proxy in the first shell session
about the forwarded data, e.g.
.. code-block::
[conn1] Reading...
[conn1] Got CONNECT request for canhazip.com, connecting...
[conn1 <> canhazip.com] Connected to canhazip.com, sending 200 response...
[conn1 <> canhazip.com] Sent "200 Connection established" to client, tunnel established.
[conn1 -> canhazip.com] Forwarded 196 bytes
[conn1 <- canhazip.com] Forwarded 2954 bytes
...
#. You can even configure your OS or browser to use the proxy,
and then try visiting some HTTPS websites as you would normally.
It works! 💪
HTTP sites won't work because the proxy only handles HTTP CONNECT requests.
But HTTP is weak sauce anyways. 🤓
*A YouTube video streaming through the proxy:*
.. image:: https://user-images.githubusercontent.com/64992/38785817-c03acd0a-414d-11e8-8f4a-2c5aa27e79e6.png
:alt: screenshot of a YouTube video streaming through the proxy
*Changing system proxy settings on macOS:*
.. image:: https://user-images.githubusercontent.com/64992/38785931-b657d804-414e-11e8-8cfa-e05a11364f7d.png
:alt: screenshot of changing system proxy settings on macOS
#. When you're done, just hit Ctrl+C to kill the server.
Don't forget to restore any proxy settings you changed
to how they were set before.
For a one-liner test that only requires one shell session, run:
.. code-block::
./trio_http_proxy.py & sleep 1; curl -x http://127.0.0.1:8080 https://canhazip.com ; kill %1

7
__init__.py Normal file
View File

@ -0,0 +1,7 @@
# Copyright 2018-2019 Joshua Bronson. All Rights Reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from .trio_http_proxy import *

View File

@ -1,132 +1,127 @@
#!/usr/bin/env python3
# Copyright 2018 Joshua Bronson. All Rights Reserved.
# Copyright 2018-2019 Joshua Bronson. All Rights Reserved.
#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
"""
Simple HTTP CONNECT proxy implemented with trio: https://trio.readthedocs.io
Tested with Python 3.6 and Trio 0.3.0.
Instructions:
#. In one terminal, run this script to start the proxy on port 8080:
$ ./trio_http_proxy.py
* Starting HTTP proxy on port 8080...
(You can set the PORT env var to use a different port if you prefer.)
#. In another terminal, make an HTTPS request through the proxy, e.g.
$ curl -x http://127.0.0.1:8080 https://canhazip.com
You should get the response you were expecting from the destination server,
and should see output in the first terminal about the forwarded data, e.g.
[conn1] Got CONNECT request for canhazip.com
[conn1] Connected to canhazip.com, sending 200 response...
[conn1] Sent "200 Connection established" to client
[conn1 -> canhazip.com] Forwarded 196 bytes
[conn1 <- canhazip.com] Forwarded 2954 bytes
...
#. For even moar proxy amaze,
configure your OS or web browser to use the proxy,
and then try browsing to some HTTPS websites.
It works! 💪
HTTP sites won't work because the proxy only handles HTTP CONNECT requests.
But http is weak sauce anyways. 🤓
#. When you're done, just hit Ctrl+C to kill the server.
Don't forget to restore any proxy settings you changed
to how they were set before.
"""
from itertools import count
from functools import partial
from itertools import count
from os import getenv
from textwrap import indent
from traceback import format_exc
import trio
from contextvars import ContextVar
from trio import open_nursery, open_tcp_stream, run, serve_tcp
PORT = int(getenv('PORT', 8080))
DEFAULT_BUFLEN = 16384
indented = partial(indent, prefix=' ')
decoded_and_indented = lambda some_bytes: indented(some_bytes.decode())
DEFAULT_PORT = 8080
PORT = int(getenv('PORT', DEFAULT_PORT)) # pylint: disable=invalid-envvar-default
BUFMAXLEN = 16384
OK_CONNECT_PORTS = {443, 8443}
prn = partial(print, end='') # pylint: disable=C0103
indented = partial(indent, prefix=' ') # pylint: disable=C0103
decoded_and_indented = lambda some_bytes: indented(some_bytes.decode()) # pylint: disable=C0103
CV_CLIENT_STREAM = ContextVar('client_stream', default=None)
CV_DEST_STREAM = ContextVar('dest_stream', default=None)
CV_PIPE_FROM = ContextVar('pipe_from', default=None)
async def start_server(port=PORT):
print(f'* Starting HTTP proxy on port {port or "(OS-chosen available port)"}...')
async def http_proxy(client_stream, _connidgen=count(1)):
client_stream.id = next(_connidgen)
CV_CLIENT_STREAM.set(client_stream)
async with client_stream:
try:
dest_stream = await tunnel(client_stream)
async with dest_stream, open_nursery() as nursery:
nursery.start_soon(pipe, client_stream, dest_stream)
nursery.start_soon(pipe, dest_stream, client_stream)
except Exception: # pylint: disable=broad-except
log(f'\n{indented(format_exc())}')
async def start_server(server=http_proxy, port=PORT):
print(f'* Starting {server.__name__} on port {port or "(OS-selected port)"}...')
try:
await trio.serve_tcp(http_proxy, port)
await serve_tcp(server, port)
except KeyboardInterrupt:
print('\nGoodbye for now.')
async def http_proxy(client_stream, _identgen=count(1)):
ident = next(_identgen)
async with client_stream:
try:
dest_stream, dest = await tunnel(client_stream, log=mklog(f'conn{ident}'))
async with dest_stream, trio.open_nursery() as nursery:
nursery.start_soon(pipe, client_stream, dest_stream, mklog(f'conn{ident} -> {dest}'))
nursery.start_soon(pipe, dest_stream, client_stream, mklog(f'conn{ident} <- {dest}'))
except Exception:
print(f'[conn{ident}]:\n{indented(format_exc())}')
async def tunnel(client_stream, log=print):
async def tunnel(client_stream):
"""Given a stream from a client containing an HTTP CONNECT request,
open a connection to the destination server specified in the CONNECT request,
and notify the client when the end-to-end connection has been established.
Return the destination stream and the corresponding host.
"""
dest = await read_and_get_dest_from_http_connect_request(client_stream, log=log)
log(f'Got CONNECT request for {dest}, connecting...')
dest_stream = await trio.open_tcp_stream(dest, 443)
log(f'Connected to {dest}, sending 200 response...')
desthost, destport = await process_as_http_connect_request(client_stream)
log(f'Got CONNECT request for {desthost}:{destport}, connecting...')
dest_stream = await open_tcp_stream(desthost, destport)
dest_stream.host = desthost
dest_stream.port = destport
CV_DEST_STREAM.set(dest_stream)
log(f'Connected to {desthost}, sending 200 response...')
await client_stream.send_all(b'HTTP/1.1 200 Connection established\r\n\r\n')
log('Sent "200 Connection established" to client, tunnel established.')
return dest_stream, dest
log('Sent 200 to client, tunnel established.')
return dest_stream
async def read_and_get_dest_from_http_connect_request(stream, maxlen=256, log=print):
"""Read a stream expected to contain a valid HTTP CONNECT request to desthost:443.
async def process_as_http_connect_request(stream, bufmaxlen=BUFMAXLEN):
"""Read a stream expected to contain a valid HTTP CONNECT request to desthost:destport.
Parse and return the destination host. Validate (lightly) and raise if request invalid.
See https://tools.ietf.org/html/rfc7231#section-4.3.6 for the CONNECT spec.
"""
log(f'Reading...')
bytes_read = await stream.receive_some(maxlen)
assert bytes_read.endswith(b'\r\n\r\n'), f'CONNECT request did not fit in {maxlen} bytes?\n{decoded_and_indented(bytes_read)}'
bytes_read = await stream.receive_some(bufmaxlen)
assert bytes_read.endswith(b'\r\n\r\n'), f'CONNECT request did not fit in {bufmaxlen} bytes?\n{decoded_and_indented(bytes_read)}'
# Only examine the first two tokens (e.g. "CONNECT example.com:443 [ignored...]").
# The Host header should duplicate the CONNECT request's authority and should therefore be safe
# to ignore. Plus apparently some clients (iOS, Facebook) don't even send a Host header in
# CONNECT requests according to https://go-review.googlesource.com/c/go/+/44004.
split = bytes_read.split(maxsplit=2)
assert len(split) == 3, f'No "CONNECT foo:443 HTTP/1.1"?\n{decoded_and_indented(bytes_read)}'
connect, dest, _ = split
assert connect == b'CONNECT', f'{connect}\n{decoded_and_indented(bytes_read)}'
assert dest.endswith(b':443'), f'{dest}\n{decoded_and_indented(bytes_read)}'
return dest[:-4].decode()
assert len(split) == 3, f'Expected "<method> <authority> ..."\n{decoded_and_indented(bytes_read)}'
method, authority, _ = split
assert method == b'CONNECT', f'Expected "CONNECT", "{method}" unsupported\n{decoded_and_indented(bytes_read)}'
desthost, colon, destport = authority.partition(b':')
assert colon and destport, f'Expected ":<port>" in {authority}\n{decoded_and_indented(bytes_read)}'
destport = int(destport.decode())
assert destport in OK_CONNECT_PORTS, f'Forbidden destination port: {destport}'
return desthost.decode(), destport
async def pipe(from_stream, to_stream, log=print, buflen=DEFAULT_BUFLEN):
async def read_all(stream, bufmaxlen=BUFMAXLEN):
while True:
chunk = await from_stream.receive_some(buflen)
chunk = await stream.receive_some(bufmaxlen)
if not chunk:
break
yield chunk
async def pipe(from_stream, to_stream, bufmaxlen=BUFMAXLEN):
CV_PIPE_FROM.set(from_stream)
async for chunk in read_all(from_stream, bufmaxlen=bufmaxlen): # pylint: disable=E1133; https://github.com/PyCQA/pylint/issues/2311
await to_stream.send_all(chunk)
log(f'Forwarded {len(chunk)} bytes')
log(f'Pipe finished')
def mklog(tag):
def log(*args, **kw):
print(f'[{tag}]', *args, **kw)
return log
def log(*args, **kw):
client_stream = CV_CLIENT_STREAM.get()
if client_stream:
prn(f'[conn{client_stream.id}')
dest_stream = CV_DEST_STREAM.get()
if dest_stream:
direction = '<>'
pipe_from = CV_PIPE_FROM.get()
if pipe_from:
direction = '->' if pipe_from is client_stream else '<-'
prn(f' {direction} {dest_stream.host}')
prn('] ')
print(*args, **kw)
if __name__ == '__main__':
trio.run(start_server)
run(start_server)