User:Nzsteak/graticuletest
From Geo Hashing
#!/usr/bin/python
import urllib2
import re
from xml.dom import minidom
# set up some different sort routines
def sort_ns_we((name1,lat1,long1,city1),(name2,lat2,long2,city2)):
if ((lat2 - lat1) == 0):
return (long1 - long2)
else:
return (lat2 - lat1)
def sort_we_ns((name1,lat1,long1,city1),(name2,lat2,long2,city2)):
if ((long2 - long1) == 0):
return (lat2 - lat1)
else:
return (long1 - long2)
# deal with -0/+0 by shifting +0 and positives one to the right during process
def encode_num(num):
if (int(num) == 0 and (num.find("-") == -1)):
return 1
elif int(num) <= 0:
return int(num)
else:
return int(num) + 1
# shift all the positives back one and return "-0" for the zero
def decode_num(num):
if (num == 0):
return "-0"
elif (num < 0):
return str(num)
else:
return str(num - 1)
# puts a category and all its graticules in sorted order into the output buffer
def output_category(name, graticules):
output.append("\n" + name + "\n")
graticules.sort(sort_ns_we)
for (graticuleName, lat, long, cityName) in graticules:
output.append("[[" +
graticuleName + "|" +
decode_num(lat) + ", " + decode_num(long) + " (" +
cityName +
")]]\n")
# MAINLINE
curCategory = ""
curGraticules = []
output = []
# category regex matches the === [[:Category:name|name]] === lines
category = re.compile("===.*\[\[:Category:(.*)\|(.*)\]\].*===")
# graticule regex maches the [[graticuleName|lat, long (cityName)]] lines
graticule = re.compile("\[\[(.*)\|([- 0-9]*),([- 0-9]*)\((.*)\)\]\]")
# get the current data from the Active Graticules page
text = urllib2.urlopen("http://wiki.xkcd.com/wgh/api.php?action=query&titles=Active_Graticules&prop=revisions&rvprop=content&format=xml")
xml = minidom.parse(text)
# hack, hack, hack, nasty hack
text = unicode(xml.firstChild.firstChild.lastChild.firstChild.firstChild.firstChild.firstChild.data).split("\n")
# go through all the current text
for line in text:
if curCategory == "":
# we haven't seen a category yet
match = category.match(line)
if match:
# this is a category line so set the current category
curCategory = line
else:
# this is text before any categories that we want to keep
output.append(line)
else:
# we are in the middle of a category
match = category.match(line)
if match and (line != curCategory):
# we're at the next category, so we output the last one and reset
output_category(curCategory, curGraticules)
curCategory = line
curGraticules = []
match = graticule.match(line)
if match:
# we've got a graticule line so record the data
curGraticules.append(
(match.group(1),
encode_num(match.group(2)),
encode_num(match.group(3)),
match.group(4))
)
# we're at the end of the data, we may need to output the last one
if curCategory != "":
output_category(curCategory, curGraticules)
# print out all the output buffer
for line in output:
print line.encode('utf-8')
(( Sample output removed - no longer matches All Graticules page --Thomcat 14:19, 3 July 2008 (UTC) ))