Twisted Python getPage
- by David Dixon II
I tried to get support on this but I am TOTALLY confused.
Here's my code:
from twisted.internet import reactor
from twisted.web.client import getPage
from twisted.web.error import Error
from twisted.internet.defer import DeferredList
from sys import argv
class GrabPage:
def __init__(self, page):
self.page = page
def start(self, *args):
if args == ():
# We apparently don't need authentication for this
d1 = getPage(self.page)
else:
if len(args) == 2:
# We have our login information
d1 = getPage(self.page, headers={"Authorization": " ".join(args)})
else:
raise Exception('Missing parameters')
d1.addCallback(self.pageCallback)
dl = DeferredList([d1])
d1.addErrback(self.errorHandler)
dl.addCallback(self.listCallback)
def errorHandler(self,result):
# Bad thingy!
pass
def pageCallback(self, result):
return result
def listCallback(self, result):
print result
a = GrabPage('http://www.google.com')
data = a.start() # Not the HTML
I wish to get the HTML out which is given to pageCallback when start() is called. This has been a pita for me. Ty! And sorry for my sucky coding.