build my http-proxy-cache-server

This commit is contained in:
JamesonHuang 2015-06-18 15:57:43 +08:00
parent 9a1a025dec
commit 4e65b8a551
3 changed files with 424 additions and 0 deletions

View File

@ -0,0 +1,69 @@
#!/usr/bin/env python
# coding=utf-8
import urllib
import urllib2
import json
class CacheUtils:
@staticmethod
def cbk(a, b, c):
'''''回调函数
@a: 已经下载的数据块
@b: 数据块的大小
@c: 远程文件的大小
'''
per = 100.0 * a * b / c
if per > 100:
per = 100
print '%.2f%%' % per
def download(self, url, local):
urllib.urlretrieve(url, local, self.cbk)
def cache(self, url, range):
fileName = url.split('/')[-1]
req = urllib2.Request(url)
req.add_header('Range', 'bytes=' + range)
response = urllib2.urlopen(req)
buffer = response.read()
with open("./cache/" + fileName + range, "a+") as fp:
fp.write(buffer)
def saveReq(self, url):
# Reading data back
with open('data.json', 'r') as fp:
data = json.load(fp)
data[url] = 4000
# Writing JSON data
with open('data.json', 'w') as fp:
json.dump(data, fp)
def checkReq(self):
# Reading data back
with open('data.json', 'r') as fp:
data = json.load(fp)
#print(data)
#print(data.keys())
print(data["www.baidu.com"])
if data.get("key"):
print(data["key"])
else:
print("error")
"""
if __name__ == '__main__':
cacheUtils = CacheUtils()
#url = "http://www.sina.com.cn"
#fileName = url.split('/')[-1]
#cacheUtils.download(url, "./cache/" + fileName)
#cacheUtils.cache("http://www.baidu.com")
#cacheUtils.cache("https://ss0.bdstatic.com/5aV1bjqh_Q23odCf/static/superplus/img/logo_white_ee663702.png", "0-7000")
#cacheUtils.cache("https://ss0.bdstatic.com/5aV1bjqh_Q23odCf/static/superplus/img/logo_white_ee663702.png", "7001-14175")
cacheUtils.saveReq("http://www.sina.com.cn")
#cacheUtils.loadReq()
"""

View File

@ -0,0 +1,192 @@
#!/usr/bin/env python
# coding=UTF-8
# Copyright (c) 2014, Peter Ruibal. All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
#
from twisted.internet import protocol, reactor
from twisted.internet.error import CannotListenError, ConnectError
from twisted.internet.interfaces import IReactorTCP, IReactorSSL
from twisted.protocols import tls
from twisted.python import log
from twisted.web import http
from zope.interface import implements
class ProxyConnectError(ConnectError):
pass
class HTTPProxyConnector(object):
"""Helper to wrap reactor connection API (TCP, SSL) via a CONNECT proxy."""
implements(IReactorTCP, IReactorSSL)
def __init__(self, proxy_host, proxy_port,
reactor=reactor):
self.proxy_host = proxy_host
self.proxy_port = proxy_port
self.reactor = reactor
def listenTCP(port, factory, backlog=50, interface=''):
raise CannotListenError("Cannot BIND via HTTP proxies")
def connectTCP(self, host, port, factory, timeout=30, bindAddress=None):
f = HTTPProxiedClientFactory(factory, host, port)
self.reactor.connectTCP(self.proxy_host,
self.proxy_port,
f, timeout, bindAddress)
def listenSSL(self, port, factory, contextFactory, backlog=50, interface=''):
raise CannotListenError("Cannot BIND via HTTP proxies")
def connectSSL(self, host, port, factory, contextFactory, timeout=30,
bindAddress=None):
tlsFactory = tls.TLSMemoryBIOFactory(contextFactory, True, factory)
return self.connectTCP(host, port, tlsFactory, timeout, bindAddress)
class HTTPProxiedClientFactory(protocol.ClientFactory):
"""ClientFactory wrapper that triggers an HTTP proxy CONNECT on connect"""
def __init__(self, delegate, dst_host, dst_port):
self.delegate = delegate
self.dst_host = dst_host
self.dst_port = dst_port
def startedConnecting(self, connector):
return self.delegate.startedConnecting(connector)
def buildProtocol(self, addr):
p = HTTPConnectTunneler(self.dst_host, self.dst_port, addr)
p.factory = self
return p
def clientConnectionFailed(self, connector, reason):
return self.delegate.clientConnectionFailed(connector, reason)
def clientConnectionLost(self, connector, reason):
return self.delegate.clientConnectionLost(connector, reason)
class HTTPConnectTunneler(protocol.Protocol):
"""Protocol that wraps transport with CONNECT proxy handshake on connect
`factory` MUST be assigned in order to use this Protocol, and the value
*must* have a `delegate` attribute to trigger wrapped, post-connect,
factory (creation) methods.
"""
http = None
otherConn = None
noisy = True
def __init__(self, host, port, orig_addr):
self.host = host
self.port = port
self.orig_addr = orig_addr
def connectionMade(self):
self.http = HTTPConnectSetup(self.host, self.port)
self.http.parent = self
self.http.makeConnection(self.transport)
def connectionLost(self, reason):
if self.noisy:
log.msg("HTTPConnectTunneler connectionLost", reason)
if self.otherConn is not None:
self.otherConn.connectionLost(reason)
if self.http is not None:
self.http.connectionLost(reason)
def proxyConnected(self):
# TODO: Bail if `self.factory` is unassigned or
# does not have a `delegate`
self.otherConn = self.factory.delegate.buildProtocol(self.orig_addr)
self.otherConn.makeConnection(self.transport)
# Get any pending data from the http buf and forward it to otherConn
buf = self.http.clearLineBuffer()
if buf:
self.otherConn.dataReceived(buf)
def dataReceived(self, data):
if self.otherConn is not None:
if self.noisy:
log.msg("%d bytes for otherConn %s" %
(len(data), self.otherConn))
return self.otherConn.dataReceived(data)
elif self.http is not None:
if self.noisy:
log.msg("%d bytes for proxy %s" %
(len(data), self.otherConn))
return self.http.dataReceived(data)
else:
raise Exception("No handler for received data... :(")
class HTTPConnectSetup(http.HTTPClient):
"""HTTPClient protocol to send a CONNECT message for proxies.
`parent` MUST be assigned to an HTTPConnectTunneler instance, or have a
`proxyConnected` method that will be invoked post-CONNECT (http request)
"""
noisy = True
def __init__(self, host, port):
self.host = host
self.port = port
def connectionMade(self):
self.sendCommand('CONNECT', '%s:%d' % (self.host, self.port))
self.endHeaders()
def handleStatus(self, version, status, message):
if self.noisy:
log.msg("Got Status :: %s %s %s" % (status, message, version))
if str(status) != "200":
raise ProxyConnectError("Unexpected status on CONNECT: %s" % status)
def handleHeader(self, key, val):
if self.noisy:
log.msg("Got Header :: %s: %s" % (key, val))
def handleEndHeaders(self):
if self.noisy:
log.msg("End Headers")
# TODO: Make sure parent is assigned, and has a proxyConnected callback
self.parent.proxyConnected()
def handleResponse(self, body):
if self.noisy:
log.msg("Got Response :: %s" % (body))
if __name__ == '__main__':
import sys
import argparse
log.startLogging(sys.stderr)
ap = argparse.ArgumentParser()
ap.add_argument('--proxy-host', default='localhost')
ap.add_argument('--proxy-port', default=8080, type=int)
ns = ap.parse_args()
proxy = HTTPProxyConnector(proxy_host=ns.proxy_host,
proxy_port=ns.proxy_port)
def cb(*args, **kwargs):
log.msg("Got callback: args=%s, kwargs=%s" %
(args, kwargs))
import twisted.web.client
agent = twisted.web.client.Agent(reactor=proxy)
#d = agent.request('GET', 'https://www.google.com/robots.txt')
#d = agent.request('GET', 'http://www.baidu.com')
d = agent.request('CONNECT', 'https://www.baidu.com')
d.addCallback(cb)
reactor.run()

View File

@ -0,0 +1,163 @@
#!/usr/bin/env python
# coding=UTF-8
# Copyright (c) 2014, Peter Ruibal. All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
#
from twisted.internet.protocol import Protocol, ClientFactory
from twisted.web.proxy import Proxy, ProxyRequest
from twisted.python import log
import urlparse
class ConnectProxyRequest(ProxyRequest):
"""HTTP ProxyRequest handler (factory) that supports CONNECT"""
connectedProtocol = None
def process(self):
# CONNECT另写函数processConnectRequest实现
if self.method == 'CONNECT':
self.processConnectRequest()
else:
ProxyRequest.process(self)
def fail(self, message, body):
self.setResponseCode(501, message)
self.responseHeaders.addRawHeader("Content-Type", "text/html")
self.write(body)
self.finish()
def splitHostPort(self, hostport, default_port):
port = default_port
parts = hostport.split(':', 1)
if len(parts) == 2:
try:
port = int(parts[1])
except ValueError:
pass
return parts[0], port
def processConnectRequest(self):
parsed = urlparse.urlparse(self.uri)
default_port = self.ports.get(parsed.scheme)
host, port = self.splitHostPort(parsed.netloc or parsed.path,
default_port)
if port is None:
self.fail("Bad CONNECT Request",
"Unable to parse port from URI: %s" % repr(self.uri))
return
clientFactory = ConnectProxyClientFactory(host, port, self)
# TODO provide an API to set proxy connect timeouts
self.reactor.connectTCP(host, port, clientFactory)
#类似protocol在这里作为客户端的角色
class ConnectProxy(Proxy):
"""HTTP Server Protocol that supports CONNECT"""
requestFactory = ConnectProxyRequest
connectedRemote = None
def requestDone(self, request):
"""connect请求 && 属于远程客户端的请求,则将该客户端改成当前代理服务器"""
if request.method == 'CONNECT' and self.connectedRemote is not None:
self.connectedRemote.connectedClient = self
else:
Proxy.requestDone(self, request)
def connectionLost(self, reason):
"""代理服务器请求web服务器时连接断开了
也要通知并断开代理服务器与客户端的连接"""
if self.connectedRemote is not None:
self.connectedRemote.transport.loseConnection()
Proxy.connectionLost(self, reason)
def dataReceived(self, data):
# 数据收到后,如果代理服务器自己的请求,自己接收,
if self.connectedRemote is None:
Proxy.dataReceived(self, data)
else:
# Once proxy is connected, forward all bytes received
# from the original client to the remote server.
# 如果是远程客户端的请求,则将数据传给远程客户端
self.connectedRemote.transport.write(data)
#作为普通server角色
class ConnectProxyClient(Protocol):
connectedClient = None
def connectionMade(self):
self.factory.request.channel.connectedRemote = self
self.factory.request.setResponseCode(200, "CONNECT OK")
self.factory.request.setHeader('X-Connected-IP',
self.transport.realAddress[0])
self.factory.request.setHeader('Content-Length', '0')
self.factory.request.finish()
def connectionLost(self, reason):
if self.connectedClient is not None:
self.connectedClient.transport.loseConnection()
def dataReceived(self, data):
if self.connectedClient is not None:
# Forward all bytes from the remote server back to the
# original connected client
self.connectedClient.transport.write(data)
else:
log.msg("UNEXPECTED DATA RECEIVED:", data)
#数据收到后会激活该对象
class ConnectProxyClientFactory(ClientFactory):
protocol = ConnectProxyClient
def __init__(self, host, port, request):
self.request = request
self.host = host
self.port = port
def clientConnectionFailed(self, connector, reason):
self.request.fail("Gateway Error", str(reason))
if __name__ == '__main__':
import sys
log.startLogging(sys.stderr)
import argparse
ap = argparse.ArgumentParser()
ap.add_argument('port', default=8080, nargs='?', type=int)
ap.add_argument('--ssl-cert', type=str)
ap.add_argument('--ssl-key', type=str)
ns = ap.parse_args()
import twisted.web.http
factory = twisted.web.http.HTTPFactory()
factory.protocol = ConnectProxy
import twisted.internet
if ns.ssl_key and not ns.ssl_cert:
log.msg("--ssl-key must be used with --ssl-cert")
sys.exit(1)
if ns.ssl_cert:
from twisted.internet import ssl
with open(ns.ssl_cert, 'rb') as fp:
ssl_cert = fp.read()
if ns.ssl_key:
from OpenSSL import crypto
with open(ns.ssl_key, 'rb') as fp:
ssl_key = fp.read()
certificate = ssl.PrivateCertificate.load(
ssl_cert,
ssl.KeyPair.load(ssl_key, crypto.FILETYPE_PEM),
crypto.FILETYPE_PEM)
else:
certificate = ssl.PrivateCertificate.loadPEM(ssl_cert)
twisted.internet.reactor.listenSSL(ns.port, factory,
certificate.options())
else:
twisted.internet.reactor.listenTCP(ns.port, factory)
twisted.internet.reactor.run()