User:Nzsteak/graticuletest

From Geohashing

#!/usr/bin/python

import urllib2
import re
from xml.dom import minidom 

# set up some different sort routines 
def sort_ns_we((name1,lat1,long1,city1),(name2,lat2,long2,city2)):
    if ((lat2 - lat1) == 0):
        return (long1 - long2)
    else:
        return (lat2 - lat1)

def sort_we_ns((name1,lat1,long1,city1),(name2,lat2,long2,city2)):
    if ((long2 - long1) == 0):
        return (lat2 - lat1)
    else:
        return (long1 - long2)

# deal with -0/+0 by shifting +0 and positives one to the right during process
def encode_num(num):
    if (int(num) == 0 and (num.find("-") == -1)):
        return 1
    elif int(num) <= 0:
        return int(num)
    else:
        return int(num) + 1

# shift all the positives back one and return "-0" for the zero
def decode_num(num):
    if (num == 0):
        return "-0"
    elif (num < 0):
        return str(num)
    else:
        return str(num - 1)

# puts a category and all its graticules in sorted order into the output buffer
def output_category(name, graticules):
    output.append("\n" + name + "\n")
    graticules.sort(sort_ns_we)
    for (graticuleName, lat, long, cityName) in graticules:
        output.append("[[" + 
                      graticuleName + "|" + 
                      decode_num(lat) + ", " + decode_num(long) + " (" + 
                      cityName + 
                      ")]]\n")

# MAINLINE

curCategory = ""
curGraticules = []
output = []

# category regex matches the === [[:Category:name|name]] === lines
category  = re.compile("===.*\[\[:Category:(.*)\|(.*)\]\].*===")
# graticule regex maches the [[graticuleName|lat, long (cityName)]] lines
graticule = re.compile("\[\[(.*)\|([- 0-9]*),([- 0-9]*)\((.*)\)\]\]")

# get the current data from the Active Graticules page
text = urllib2.urlopen("http://wiki.xkcd.com/wgh/api.php?action=query&titles=Active_Graticules&prop=revisions&rvprop=content&format=xml")
xml = minidom.parse(text)
# hack, hack, hack, nasty hack 
text = unicode(xml.firstChild.firstChild.lastChild.firstChild.firstChild.firstChild.firstChild.data).split("\n")

# go through all the current text
for line in text:
    if curCategory == "":
        # we haven't seen a category yet 
        match = category.match(line)
        if match:
            # this is a category line so set the current category
            curCategory = line
        else:
           # this is text before any categories that we want to keep
            output.append(line)
    else:
        # we are in the middle of a category
        match = category.match(line)
        if match and (line != curCategory):
            # we're at the next category, so we output the last one and reset
            output_category(curCategory, curGraticules)
            curCategory = line
            curGraticules = []

        match = graticule.match(line)
        if match:
            # we've got a graticule line so record the data
            curGraticules.append(
                (match.group(1), 
                 encode_num(match.group(2)), 
                 encode_num(match.group(3)),
                 match.group(4))
                )

# we're at the end of the data, we may need to output the last one
if curCategory != "":
    output_category(curCategory, curGraticules)

# print out all the output buffer
for line in output:
    print line.encode('utf-8')

(( Sample output removed - no longer matches All Graticules page --Thomcat 14:19, 3 July 2008 (UTC) ))