Just enabled multiple-page support for the poor man's web-page delta tracker:
#!/Users/hdiwan/.virtualenvs/globetrekker/bin/python
from selenium import webdriver
from selenium.common.exceptions import WebDriverException
import argparse
import hashlib
import json
import logging
import pprint
import smtplib
def get_globetrekker_page(site):
browser = webdriver.Chrome()
browser.get(site)
return browser
def send_mail(msg, user, password):
server = smtplib.SMTP('smtp.gmail.com', 587)
server.ehlo()
server.starttls()
server.ehlo()
server.login(user, password)
server.sendmail(user, user, msg)
if __name__ == '__main__':
argparser = argparse.ArgumentParser(description='Check a website for changes')
argparser.add_argument('-n', '--url', type=str, default=None, help='Add URL to watcher', action='store')
argparser.add_argument('-l', '--list', action='store_true')
argparser.add_argument('-u', '--user', type=str, default='hd1@jsc.d8u.us', help='Your username', action='store')
argparser.add_argument('-p', '--password', type=str, help='Your password', action='store')
argparser.add_argument('-v', '--verbose', action='store_false')
parsed = argparser.parse_args()
if not parsed.verbose:
logging.basicConfig(level=logging.DEBUG)
else:
logging.basicConfig(level=logging.FATAL)
if parsed.url:
new_hash = {parsed.url: 0}
output = json.dumps(new_hash)
logging.debug(output)
try:
with open('/var/tmp/.globetrekker.txt', 'r') as fin:
data = json.load(fin)
data.append(new_hash)
except IOError, v:
with open('/var/tmp/.globetrekker.txt', 'w') as fout:
json.dump([new_hash], fout)
exit()
with open('/var/tmp/.globetrekker.txt', 'r') as fin:
stored_hash_json = json.load(fin)
logging.debug(stored_hash_json)
if parsed.list:
for k in stored_hash_json:
print(k)
exit()
new_hashes = []
stored_hash = stored_hash_json
for stored_hash_ in stored_hash:
for url in stored_hash_.keys():
logging.debug('{} is our URL'.format(url))
try:
browser = get_globetrekker_page(url)
except WebDriverException, e:
continue
encoding = 'ascii'
text = browser.find_element_by_tag_name('html').text
encoded = text.encode(encoding, errors='replace')
logging.debug(encoded)
decoded = encoded.decode(encoding, errors='replace')
logging.debug(decoded)
new_hash = hashlib.sha1(decoded).hexdigest()
logging.debug('Calculated hash code: {}'.format(new_hash))
logging.debug('Stored hash: {}'.format(stored_hash_[url]))
if new_hash != stored_hash_[url]:
logging.debug('{} changed'.format(url))
send_mail(u'Subject: {} Change detected\r\n\r\n--H'.format(url), parsed.user, parsed.password)
stored_hash_[url] = new_hash
browser.quit()
with open('/var/tmp/.globetrekker.txt', 'w') as fout:
fout.write(json.dumps(stored_hash))
No comments:
Post a Comment