September 1, 2014

How to Record Data

I know it's Labor Day and what-nots and I'm supposed to be celebrating the end of Burning man, being at a bar-be-queue, but I'm not. Instead, I'm tweaking things, bringing me to what I just accomplished -- a flask-based REST API to data, in python, naturally:

from backports import lzma
import cStringIO as StringIO
import csv
import datetime
from flask import Flask, request, Response
import json

# TODO force SSL for post -- http://flask.pocoo.org/snippets/111/

DATA_FILE = 'sanguine.csv.xz'
app = Flask(__name__)

@app.route('/', methods = ['GET'])
def index():
    with lzma.LZMAFile(DATA_FILE, 'r') as data:
        output = StringIO.StringIO()
        reader = csv.DictReader(data, fieldnames=['Timestamp','User','Latitude','Longitude'], quoting=csv.QUOTE_MINIMAL, lineterminator='\r\n')
        reader.next() # skip header line
        return(Response(json.dumps(list(reader)), mimetype='application/json'))

@app.route('/', methods=['POST'])
def newdatapiece():
    with lzma.LZMAFile(DATA_FILE, 'a') as data:
        writer = csv.DictWriter(data, fieldnames=['Timestamp','User', 'Latitude','Longitude'], quoting = csv.QUOTE_MINIMAL, lineterminator='\r\n')
        row = {}
        row['Timestamp'] = datetime.datetime.now().strftime('%s')
        row['User'] = request.form['user_id']
        row['Latitude'] = request.form['lat']
        row['Longitude'] = request.form['lon']
        writer.writerow(row)
    return '', 201

@app.route('/analyze', methods=['GET']) 
def analysis():
    lines = []
    with lzma.LZMAFile(DATA_FILE, 'r') as data:
        lines = data.readlines()
    return(Response(lines, mimetype='application/csv'))

if __name__ == '__main__':
    with lzma.LZMAFile(DATA_FILE, 'w') as data:
        writer = csv.DictWriter(data, fieldnames=['Timestamp','User', 'Latitude','Longitude'], quoting = csv.QUOTE_MINIMAL, lineterminator='\r\n')
        writer.writeheader()
    app.run(host='0.0.0.0', port=8080, debug=True)

August 26, 2014

How to Watch webpages for Changes

Today, I encountered a webpage for which there is no rss feed, nor atom feed. I whipped up something to watch the page myself and report on any changes in python. Hey, Guido, if you integrate requests, I won't have any non-stdlib requirements to the script. What do you say? please? Not that the BDFL reads my blog but anyway, here's the code:


#!/Users/hdiwan/.virtualenvs/globetrekker/bin/python
import argparse
import hashlib
import json
import logging
import pprint
import requests
import smtplib


def send_mail(msg, user, password):
    server = smtplib.SMTP('smtp.gmail.com', 587)
    server.ehlo()
    server.starttls()
    server.ehlo()
    server.login(user, password)
    server.sendmail(user, user, msg)

if __name__ == '__main__':
    argparser = argparse.ArgumentParser(description='Check a website for changes')
    argparser.add_argument('-n', '--url', type=str, default=None, help='Add URL to watcher',  action='store')
    argparser.add_argument('-l', '--list', action='store_true')
    argparser.add_argument('-u', '--user', type=str, default='hd1@jsc.d8u.us', help='Your username',  action='store')
    argparser.add_argument('-p', '--password', type=str, help='Your password', action='store')
    argparser.add_argument('-v', '--verbose', action='store_false')
    parsed = argparser.parse_args()

    if not parsed.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.FATAL)

    if parsed.url:
        new_hash = {parsed.url: 0}
        output = json.dumps(new_hash)
        logging.debug(output)
        try:
            with open('/var/tmp/.globetrekker.txt', 'a') as fin:
                data = json.load(fin)
                data[parsed.url] = 0
                logging.debug(data)
                json.dump(data, fin)
        except:
            with open('/var/tmp/.globetrekker.txt', 'w') as fout:
                json.dump(new_hash, fout)
        exit()

    with open('/var/tmp/.globetrekker.txt', 'r') as fin:
        stored_hash_json = json.load(fin)
        logging.debug(stored_hash_json)
        if parsed.list:
            for k in stored_hash_json:
                print(k)
            exit()
    new_hashes = []
    stored_hash = stored_hash_json
    logging.debug(stored_hash)
    for url in stored_hash:
        logging.debug('{} is our URL'.format(url))
        browser = requests.get(url)
        encoding = 'utf-8'
        logging.debug('page retrieved -- {}'.format(url[0]))
        text = browser.content
        encoded = text.encode(encoding, errors='xmlcharrefreplace')
        logging.debug(encoded)
        decoded = encoded.decode(encoding, errors='xmlcharrefreplace')
        logging.debug(decoded)
        new_hash = hashlib.sha1(decoded).hexdigest()
        logging.debug('Calculated hash code: {}'.format(new_hash))
        logging.debug('Stored hash: {}'.format(stored_hash[url]))
        if new_hash != stored_hash[url]:
            logging.debug('{} changed'.format(url))
            if stored_hash[url] != 0:
                send_mail(u'Subject: {} Change detected\r\n\r\n--H'.format(url), parsed.user, parsed.password)

            stored_hash[url] = new_hash
    with open('/var/tmp/.globetrekker.txt', 'w') as fout:
        json.dump(stored_hash, fout)

August 23, 2014

How to Sign Text Using Python

An important tenet of any asymmetric encryption system is that a public key must be distributed far and wide -- it is used to encrypt information -- while the corresponding private key must be kept, well, private. The code below shows how to sign some text with your public key in python:

def sign(message):
    gpg = gnupg.GPG(gnupghome='{}/.gnupg'.format(os.path.expanduser('~')))
    gpg.encoding = 'utf-8'
    gpg.secret_keyring=['secring.gpg']
    gpg.public_keyring=['pubring.gpg']
    signed = gpg.sign(message)
    return str(signed)
Since python-gnupg can take a list for both public and private keys, both filenames go in as a list. Message is a string of plaintext, whose return value's str method returns a cleartext signature as well as the message itself.

How to Set Your User-Agent using PyCurl

An unnamed CDN was blocking my sharing script because it wasn't in its whitelist of approved user agents. And it's a common one. What to do? Fake it, till you make it, to borrow a turn of phrase, like so:


curlObj.setOpt(pycurl.USER_AGENT, 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2049.0 Safari/537.36')

How to Search for Ports on BSD

BSD systems, well, at least, FreeBSD and OpenBSD feature 3rd party packages in a ports-system. The python script below lets you search for ports by substring:


#!/home/hdiwan/.virtualenvs/ports/bin/python
import argparse
import csv

INDEX = '/usr/ports/INDEX'
if __name__ == '__main__':
    args_ = argparse.ArgumentParser(description='Ports tool for OpenBSD')
    args_.add_argument('query', help='Query', type=unicode, action='store')
    args = args_.parse_args()
    with open(INDEX, 'r') as index:
        reader = csv.reader(index, delimiter='|')
        print(args.query)
        for line in list(reader):
            if line[0].find(args.query) > 0:
                print line[0]
                exit

You need python and to have an index file (modify the path if necessary). The output of this looks like:

% python ./ports.py -q "ruby"
ruby
jruby-jdbc-h2-1.3.170.1
jruby-jdbc-mysql-5.1.22.1
jruby-jdbc-postgres-9.2.1002.1
jruby-jdbc-sqlite3-3.7.2.1
vim-7.4.135p0-gtk2-perl-python-ruby
vim-7.4.135p0-gtk2-perl-python3-ruby
vim-7.4.135p0-no_x11-perl-python-ruby
vim-7.4.135p0-no_x11-perl-python3-ruby
jruby-1.7.9
weechat-ruby-0.4.2
eruby-1.0.5p14
mod_ruby-1.2.6p7
jruby-profligacy-1.0

August 19, 2014

How to Synchronise a Syndication Feed to Reddit

The python code below will let you post the newest entry in your blog's syndication feed to Reddit's programming subreddit automatically:


#!/Users/hdiwan/.virtualenvs/blogger2reddit/bin/python
import argparse
import feedparser
import logging
import operator
import praw

if __name__ == '__main__':
    parse = argparse.ArgumentParser(description="Submit a feed's newest entry to reddit")
    parse.add_argument('-f', '--feed', action='store', help='Feed URL', default='http://www.prolificprogrammer.com/atom.xml')
    parse.add_argument('-p', '--password', action='store', help='Reddit password')
    parse.add_argument('-u', '--user', action='store', help='Reddit Username')
    parse.add_argument('-v', '--verbose', action='store_true', help='Verbose debugging')
    args = parse.parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.FATAL)

    feed = feedparser.parse(args.feed)
    entries = feed.entries
    entries = sorted(entries, key=operator.itemgetter('published'))
    logging.debug(entries)

    entry = entries[0]
    submission_title = entry.title
    submission_link = entry.link

    r = praw.Reddit(user_agent='example')

    r.login(args.user, args.password)
    logging.debug('logged in to reddit as {}'.format(args.user))
    sr = r.get_subreddit('programming')
    sr.submit(submission_title, url=submission_link)

August 18, 2014

How to be your Own CNBC Analyst

We were having a cheeky discussion on lily today about how "You could probably be recognised as a market analyst if you just reported "Dow <rises/falls> on <top headline from news.google.com>" at the end of the day." I decided to put this to the test, and automate the talking heads out of a job by mining a random headline from the New York Times and the latest NASDAQ quote:

#!/Users/hdiwan/.virtualenvs/marketAnalyst/main.py
import cStringIO as StringIO
import csv
import feedparser
import logging
import random
import requests

if __name__ == '__main__':
    logging.basicConfig(level = logging.FATAL)
    quotes_ = requests.get('http://ichart.yahoo.com/table.csv?s=QQQ')
    quotes_ = quotes_.content
    quotes = StringIO.StringIO(quotes_)
    reader = list(csv.reader(quotes))
    todays_close = float(reader[1][3])
    yesterdays_close = float(reader[2][3])
    
    news = feedparser.parse('http://www.nytimes.com/roomfordebate/index.rss?category=business')
    entries = news.entries
    random.shuffle(entries)
    logging.info(news)
    story = entries[0]

    difference = todays_close - yesterdays_close
    reason = story.title
    print('NYSE change {} because of {}'.format(difference, reason))
python ~/.virtualenvs/marketAnalyst/main.py
NYSE change 0.05 because of Can the U.S. Still Be a Leader in the Middle East?