From 4e65b8a5512df48304cc9a28d0c33bd5990af03c Mon Sep 17 00:00:00 2001 From: JamesonHuang <462430725@qq.com> Date: Thu, 18 Jun 2015 15:57:43 +0800 Subject: [PATCH] build my http-proxy-cache-server --- .../my-twisted-connect-proxy/CacheUtils.py | 69 +++++++ .../python/my-twisted-connect-proxy/client.py | 192 ++++++++++++++++++ .../python/my-twisted-connect-proxy/server.py | 163 +++++++++++++++ 3 files changed, 424 insertions(+) create mode 100644 1_7.http_proxy_server/python/my-twisted-connect-proxy/CacheUtils.py create mode 100644 1_7.http_proxy_server/python/my-twisted-connect-proxy/client.py create mode 100644 1_7.http_proxy_server/python/my-twisted-connect-proxy/server.py diff --git a/1_7.http_proxy_server/python/my-twisted-connect-proxy/CacheUtils.py b/1_7.http_proxy_server/python/my-twisted-connect-proxy/CacheUtils.py new file mode 100644 index 0000000..e705ba2 --- /dev/null +++ b/1_7.http_proxy_server/python/my-twisted-connect-proxy/CacheUtils.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# coding=utf-8 +import urllib +import urllib2 +import json +class CacheUtils: + @staticmethod + def cbk(a, b, c): + '''''回调函数 + @a: 已经下载的数据块 + @b: 数据块的大小 + @c: 远程文件的大小 + ''' + per = 100.0 * a * b / c + if per > 100: + per = 100 + print '%.2f%%' % per + + def download(self, url, local): + urllib.urlretrieve(url, local, self.cbk) + + def cache(self, url, range): + fileName = url.split('/')[-1] + req = urllib2.Request(url) + req.add_header('Range', 'bytes=' + range) + response = urllib2.urlopen(req) + buffer = response.read() + with open("./cache/" + fileName + range, "a+") as fp: + fp.write(buffer) + + def saveReq(self, url): + + # Reading data back + with open('data.json', 'r') as fp: + data = json.load(fp) + data[url] = 4000 + # Writing JSON data + with open('data.json', 'w') as fp: + json.dump(data, fp) + + + def checkReq(self): + # Reading data back + with open('data.json', 'r') as fp: + data = json.load(fp) + #print(data) + #print(data.keys()) + print(data["www.baidu.com"]) + if data.get("key"): + print(data["key"]) + else: + print("error") + +""" +if __name__ == '__main__': + cacheUtils = CacheUtils() + + #url = "http://www.sina.com.cn" + #fileName = url.split('/')[-1] + #cacheUtils.download(url, "./cache/" + fileName) + + #cacheUtils.cache("http://www.baidu.com") + #cacheUtils.cache("https://ss0.bdstatic.com/5aV1bjqh_Q23odCf/static/superplus/img/logo_white_ee663702.png", "0-7000") + #cacheUtils.cache("https://ss0.bdstatic.com/5aV1bjqh_Q23odCf/static/superplus/img/logo_white_ee663702.png", "7001-14175") + + cacheUtils.saveReq("http://www.sina.com.cn") + + #cacheUtils.loadReq() +""" diff --git a/1_7.http_proxy_server/python/my-twisted-connect-proxy/client.py b/1_7.http_proxy_server/python/my-twisted-connect-proxy/client.py new file mode 100644 index 0000000..665e80e --- /dev/null +++ b/1_7.http_proxy_server/python/my-twisted-connect-proxy/client.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python +# coding=UTF-8 +# Copyright (c) 2014, Peter Ruibal. All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +# +from twisted.internet import protocol, reactor +from twisted.internet.error import CannotListenError, ConnectError +from twisted.internet.interfaces import IReactorTCP, IReactorSSL + +from twisted.protocols import tls +from twisted.python import log + +from twisted.web import http + +from zope.interface import implements + + +class ProxyConnectError(ConnectError): + pass + + +class HTTPProxyConnector(object): + """Helper to wrap reactor connection API (TCP, SSL) via a CONNECT proxy.""" + implements(IReactorTCP, IReactorSSL) + + def __init__(self, proxy_host, proxy_port, + reactor=reactor): + self.proxy_host = proxy_host + self.proxy_port = proxy_port + self.reactor = reactor + + def listenTCP(port, factory, backlog=50, interface=''): + raise CannotListenError("Cannot BIND via HTTP proxies") + + def connectTCP(self, host, port, factory, timeout=30, bindAddress=None): + f = HTTPProxiedClientFactory(factory, host, port) + self.reactor.connectTCP(self.proxy_host, + self.proxy_port, + f, timeout, bindAddress) + + def listenSSL(self, port, factory, contextFactory, backlog=50, interface=''): + raise CannotListenError("Cannot BIND via HTTP proxies") + + def connectSSL(self, host, port, factory, contextFactory, timeout=30, + bindAddress=None): + tlsFactory = tls.TLSMemoryBIOFactory(contextFactory, True, factory) + return self.connectTCP(host, port, tlsFactory, timeout, bindAddress) + + +class HTTPProxiedClientFactory(protocol.ClientFactory): + """ClientFactory wrapper that triggers an HTTP proxy CONNECT on connect""" + def __init__(self, delegate, dst_host, dst_port): + self.delegate = delegate + self.dst_host = dst_host + self.dst_port = dst_port + + def startedConnecting(self, connector): + return self.delegate.startedConnecting(connector) + + def buildProtocol(self, addr): + p = HTTPConnectTunneler(self.dst_host, self.dst_port, addr) + p.factory = self + return p + + def clientConnectionFailed(self, connector, reason): + return self.delegate.clientConnectionFailed(connector, reason) + + def clientConnectionLost(self, connector, reason): + return self.delegate.clientConnectionLost(connector, reason) + + +class HTTPConnectTunneler(protocol.Protocol): + """Protocol that wraps transport with CONNECT proxy handshake on connect + + `factory` MUST be assigned in order to use this Protocol, and the value + *must* have a `delegate` attribute to trigger wrapped, post-connect, + factory (creation) methods. + """ + http = None + otherConn = None + noisy = True + + def __init__(self, host, port, orig_addr): + self.host = host + self.port = port + self.orig_addr = orig_addr + + def connectionMade(self): + self.http = HTTPConnectSetup(self.host, self.port) + self.http.parent = self + self.http.makeConnection(self.transport) + + def connectionLost(self, reason): + if self.noisy: + log.msg("HTTPConnectTunneler connectionLost", reason) + + if self.otherConn is not None: + self.otherConn.connectionLost(reason) + if self.http is not None: + self.http.connectionLost(reason) + + def proxyConnected(self): + # TODO: Bail if `self.factory` is unassigned or + # does not have a `delegate` + self.otherConn = self.factory.delegate.buildProtocol(self.orig_addr) + self.otherConn.makeConnection(self.transport) + + # Get any pending data from the http buf and forward it to otherConn + buf = self.http.clearLineBuffer() + if buf: + self.otherConn.dataReceived(buf) + + def dataReceived(self, data): + if self.otherConn is not None: + if self.noisy: + log.msg("%d bytes for otherConn %s" % + (len(data), self.otherConn)) + return self.otherConn.dataReceived(data) + elif self.http is not None: + if self.noisy: + log.msg("%d bytes for proxy %s" % + (len(data), self.otherConn)) + return self.http.dataReceived(data) + else: + raise Exception("No handler for received data... :(") + + +class HTTPConnectSetup(http.HTTPClient): + """HTTPClient protocol to send a CONNECT message for proxies. + + `parent` MUST be assigned to an HTTPConnectTunneler instance, or have a + `proxyConnected` method that will be invoked post-CONNECT (http request) + """ + noisy = True + + def __init__(self, host, port): + self.host = host + self.port = port + + def connectionMade(self): + self.sendCommand('CONNECT', '%s:%d' % (self.host, self.port)) + self.endHeaders() + + def handleStatus(self, version, status, message): + if self.noisy: + log.msg("Got Status :: %s %s %s" % (status, message, version)) + if str(status) != "200": + raise ProxyConnectError("Unexpected status on CONNECT: %s" % status) + + def handleHeader(self, key, val): + if self.noisy: + log.msg("Got Header :: %s: %s" % (key, val)) + + def handleEndHeaders(self): + if self.noisy: + log.msg("End Headers") + # TODO: Make sure parent is assigned, and has a proxyConnected callback + self.parent.proxyConnected() + + def handleResponse(self, body): + if self.noisy: + log.msg("Got Response :: %s" % (body)) + + +if __name__ == '__main__': + import sys + import argparse + + log.startLogging(sys.stderr) + + ap = argparse.ArgumentParser() + ap.add_argument('--proxy-host', default='localhost') + ap.add_argument('--proxy-port', default=8080, type=int) + ns = ap.parse_args() + + proxy = HTTPProxyConnector(proxy_host=ns.proxy_host, + proxy_port=ns.proxy_port) + + def cb(*args, **kwargs): + log.msg("Got callback: args=%s, kwargs=%s" % + (args, kwargs)) + + import twisted.web.client + agent = twisted.web.client.Agent(reactor=proxy) + #d = agent.request('GET', 'https://www.google.com/robots.txt') + #d = agent.request('GET', 'http://www.baidu.com') + d = agent.request('CONNECT', 'https://www.baidu.com') + d.addCallback(cb) + + reactor.run() diff --git a/1_7.http_proxy_server/python/my-twisted-connect-proxy/server.py b/1_7.http_proxy_server/python/my-twisted-connect-proxy/server.py new file mode 100644 index 0000000..babea09 --- /dev/null +++ b/1_7.http_proxy_server/python/my-twisted-connect-proxy/server.py @@ -0,0 +1,163 @@ +#!/usr/bin/env python +# coding=UTF-8 +# Copyright (c) 2014, Peter Ruibal. All rights reserved. +# +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. +# +from twisted.internet.protocol import Protocol, ClientFactory +from twisted.web.proxy import Proxy, ProxyRequest +from twisted.python import log + +import urlparse + + +class ConnectProxyRequest(ProxyRequest): + """HTTP ProxyRequest handler (factory) that supports CONNECT""" + + connectedProtocol = None + + def process(self): + # CONNECT另写函数processConnectRequest实现 + if self.method == 'CONNECT': + self.processConnectRequest() + else: + ProxyRequest.process(self) + + def fail(self, message, body): + self.setResponseCode(501, message) + self.responseHeaders.addRawHeader("Content-Type", "text/html") + self.write(body) + self.finish() + + def splitHostPort(self, hostport, default_port): + port = default_port + parts = hostport.split(':', 1) + if len(parts) == 2: + try: + port = int(parts[1]) + except ValueError: + pass + return parts[0], port + + def processConnectRequest(self): + parsed = urlparse.urlparse(self.uri) + default_port = self.ports.get(parsed.scheme) + + host, port = self.splitHostPort(parsed.netloc or parsed.path, + default_port) + if port is None: + self.fail("Bad CONNECT Request", + "Unable to parse port from URI: %s" % repr(self.uri)) + return + + clientFactory = ConnectProxyClientFactory(host, port, self) + + # TODO provide an API to set proxy connect timeouts + self.reactor.connectTCP(host, port, clientFactory) + +#类似protocol,在这里作为客户端的角色 +class ConnectProxy(Proxy): + """HTTP Server Protocol that supports CONNECT""" + requestFactory = ConnectProxyRequest + connectedRemote = None + + def requestDone(self, request): + """connect请求 && 属于远程客户端的请求,则将该客户端改成当前代理服务器""" + if request.method == 'CONNECT' and self.connectedRemote is not None: + self.connectedRemote.connectedClient = self + else: + Proxy.requestDone(self, request) + + def connectionLost(self, reason): + """代理服务器请求web服务器时,连接断开了 + ,也要通知并断开代理服务器与客户端的连接""" + if self.connectedRemote is not None: + self.connectedRemote.transport.loseConnection() + Proxy.connectionLost(self, reason) + + def dataReceived(self, data): + # 数据收到后,如果代理服务器自己的请求,自己接收, + if self.connectedRemote is None: + Proxy.dataReceived(self, data) + else: + # Once proxy is connected, forward all bytes received + # from the original client to the remote server. + # 如果是远程客户端的请求,则将数据传给远程客户端 + self.connectedRemote.transport.write(data) + +#作为普通server角色 +class ConnectProxyClient(Protocol): + connectedClient = None + + def connectionMade(self): + self.factory.request.channel.connectedRemote = self + self.factory.request.setResponseCode(200, "CONNECT OK") + self.factory.request.setHeader('X-Connected-IP', + self.transport.realAddress[0]) + self.factory.request.setHeader('Content-Length', '0') + self.factory.request.finish() + + def connectionLost(self, reason): + if self.connectedClient is not None: + self.connectedClient.transport.loseConnection() + + def dataReceived(self, data): + if self.connectedClient is not None: + # Forward all bytes from the remote server back to the + # original connected client + self.connectedClient.transport.write(data) + else: + log.msg("UNEXPECTED DATA RECEIVED:", data) + +#数据收到后会激活该对象 +class ConnectProxyClientFactory(ClientFactory): + protocol = ConnectProxyClient + + def __init__(self, host, port, request): + self.request = request + self.host = host + self.port = port + + def clientConnectionFailed(self, connector, reason): + self.request.fail("Gateway Error", str(reason)) + + +if __name__ == '__main__': + import sys + log.startLogging(sys.stderr) + + import argparse + ap = argparse.ArgumentParser() + ap.add_argument('port', default=8080, nargs='?', type=int) + ap.add_argument('--ssl-cert', type=str) + ap.add_argument('--ssl-key', type=str) + ns = ap.parse_args() + + import twisted.web.http + factory = twisted.web.http.HTTPFactory() + factory.protocol = ConnectProxy + + import twisted.internet + if ns.ssl_key and not ns.ssl_cert: + log.msg("--ssl-key must be used with --ssl-cert") + sys.exit(1) + if ns.ssl_cert: + from twisted.internet import ssl + with open(ns.ssl_cert, 'rb') as fp: + ssl_cert = fp.read() + if ns.ssl_key: + from OpenSSL import crypto + with open(ns.ssl_key, 'rb') as fp: + ssl_key = fp.read() + certificate = ssl.PrivateCertificate.load( + ssl_cert, + ssl.KeyPair.load(ssl_key, crypto.FILETYPE_PEM), + crypto.FILETYPE_PEM) + else: + certificate = ssl.PrivateCertificate.loadPEM(ssl_cert) + twisted.internet.reactor.listenSSL(ns.port, factory, + certificate.options()) + else: + twisted.internet.reactor.listenTCP(ns.port, factory) + twisted.internet.reactor.run()