Trying to build some bot for clicking "skip-ad" button on a page. So far, i manage to use Mechanize to load a web-driver browser and to connect to some page but Mechanize module do not support js directly so now i need something like Selenium if i understand correct. I am also a beginner in programming so please be specific. How can i use Selenium or if there is any other solution, please explain details.
This is the inner html code for the button:
<a id="skip-ad" class="btn btn-inverse" onclick="open_url('http://imgur.com/gallery/tDK9V68', 'go'); return false;" style="font-weight: bold; " target="_blank" href="http://imgur.com/gallery/tDK9V68"> … </a>
And this is my source so far:
#!/usr/bin/python
# FILENAME: test.py
import mechanize
import os, time
from random import choice, randrange
prox_list = []
#list of common UAS to apply to each connection attempt to impersonate browsers
user_agent_strings = [ 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1468.0 Safari/537.36',
'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.1) Gecko/2008071615 Fedora/3.0.1-1.fc9 Firefox/3.0.1',
'Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14',
'Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:23.0) Gecko/20131011 Firefox/23.0',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; Trident/5.0; .NET CLR 2.0.50727; SLCC2; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Zune 4.0; Tablet PC 2.0; InfoPath.3; .NET4.0C; .NET4.0E)',
'Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0; chromeframe/11.0.696.57)',
'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0; InfoPath.1; SV1; .NET CLR 3.8.36217; WOW64; en-US)',
'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; GTB7.4; InfoPath.2; SV1; .NET CLR 3.3.69573; WOW64; en-US)'
]
def load_proxy_list(target):
#loads and parses the proxy list
file = open(target, 'r')
count = 0
for line in file:
prox_list.append(line)
count += 1
print "Loaded " + str(count) + " proxies!"
load_proxy_list('proxies.txt')
#for i in range(1,(len(prox_list) - 1)):
# depreceated for overloading
for i in range(1,30):
br = mechanize.Browser()
#pick a random UAS to add some extra cover to the bot
br.addheaders = [('User-agent', choice(user_agent_strings))]
print "----------------------------------------------------"
#This is bad internet ethics
br.set_handle_robots(False)
#choose a proxy
proxy = choice(prox_list)
br.set_proxies({"http": proxy})
br.set_debug_http(True)
try:
print "Trying connection with: " + str(proxy)
#currently using: BTC CoinURL - Grooveshark Broadcast
br.open("http://cur.lv/4czwj")
print "Opened successfully!"
#act like a nice little drone and view the ads
sleep_time_on_link = randrange(17.0,34.0)
time.sleep(sleep_time_on_link)
except mechanize.HTTPError, e:
print "Oops Request threw " + str(e.code)
#future versions will handle codes properly, 404 most likely means
# the ad-linker has noticed bot-traffic and removed the link
# or the used proxy is terrible. We will either geo-locate
# proxies beforehand and pick good hosts, or ditch the link
# which is worse case scenario, account is closed because of botting
except mechanize.URLError, e:
print "Oops! Request was refused, blacklisting proxy!" + str(e)
prox_list.remove(proxy)
del br #close browser entirely
#wait between 5-30 seconds like a good little human
sleep_time = randrange(5.0, 30.0)
print "Waiting for %.1f seconds like a good bot." % (sleep_time)
time.sleep(sleep_time)