X-Git-Url: https://git.street.me.uk/andy/viking.git/blobdiff_plain/3333c0696a1a6cf23182ded690689f8557dab671..0a8cbd7b576f622ecad1013c3a2b0e80ffc25151:/tools/gcget diff --git a/tools/gcget b/tools/gcget index 1509592d..c7b4a540 100755 --- a/tools/gcget +++ b/tools/gcget @@ -1,122 +1,208 @@ -#!/usr/bin/perl +#!/usr/bin/env python + +# +# THIS IS NO LONGER SUPPORTED +# +print """ +This script no longer works after the geocaching.com website update see: + http://blog.geocaching.com/2011/05/preview-of-geocaching-com-may-4th-website-release/ + +It is now recommended to use the geo-* tools instead: + http://geo.rkkda.com/ +""" +import sys +sys.exit(1) + +# +# One day this script could be resurrected, as the geo-* tools are not perhaps the fastest... +# + +# +# gcget -- screen scrape Geocaching.com's annoying web interface +# aka SHOW ME THE CACHE!!! +# +# Copyright 2007, Evan Battaglia +# Distributed under the terms of the GPL v2. +# +# +# requires module mechanize +# + +# DEFAULT USERNAME AND PASSWORD: THESE ARE OVERWRITTEN BY COMMAND-LINE OPTIONS + +USER="username" +PASS="password" + +# docs needed! +# this has some extra args in: +# gcget lat,lon maxnumgcs [maxdist] [threshold] +# threshold -- if find more than this # of geocaches, don't get ANY, +# instead give warning and quit + +import sys +import getopt + +def help(): + print """gcget v0.1 +This program is free software, distributed under the terms of the GNU GPL v2. + +Usage: gcget [-u username] [-p password] lat,lon maxnumberofgcs [maxdistance] [threshold] +Downloads up to maxnumberofgcs at a distance of up to maxdistance from lat,lon. +If we number of geocaches within maxdistance is above threshold, don't download any + geocaches, just give a warning and quit. + +If username and password are not given, will use default values hard-coded in script. + +Happy caching!!! +""" + +# +# PARSE OPTIONS: USERNAME AND PASSWORD +# # -# gcget 34.25,-118.25 100 > yeah.loc -# ./gcget "Johnstown,CA" 10|gc2gpspoint - -|viking - # -# have to make viking-remote allow multiple points. maybe just regular input + maps ? - -$USERNAME="username"; -$PASSWORD="password"; - -$what = $ARGV[0]; -if (!$what) { - print "Nothing to do!\nUsage: gcget zip/lat,lon/lat.mm.mmm,lon.mm.mmm/address [max]"; - exit(0); -} - - -$zip = $lat = $lon = ""; -if ( $what =~ /^(-?[0-9]{0,3}(\.[0-9]{0,9})?),(-?[0-9]{0,3}(\.[0-9]{0,9})?)$/ ) { - ($lat,$lon) = ($1,$3); -} elsif ( $what =~ /^(-?[0-9]{1,3})\.([0-9]{1,2}\.[0-9]{0,3}),(-?[0-9]{1,3})\.([0-9]{1,2}\.[0-9]{0,3})$/ ) { - $lat = $1 + ($1 < 0 ? -1 : 1) * $2 / 60; - $lon = $3 + ($3 < 0 ? -1 : 1) * $4 / 60; -} elsif ( $what =~ /^[0-9]{5}$/ ) { - $zip = $what; -} else { - $address = `gpsaddress.pl "$what"`; - if (not (($address) and $address =~ /^(-?[0-9]{1,3}(\.[0-9]{1,9})?) (-?[0-9]{1,3}(\.[0-9]{1,9})?)$/ )) { - die "gpsaddress failed looking up address '$what'\n" - } else { - ($lat,$lon) = ($1,$3); - } -} - -# turn on perl's safety features -use warnings; -use WWW::Mechanize; -use HTML::TableContentParser; -$browser = WWW::Mechanize->new(); - -$browser->get("http://www.geocaching.com/seek/"); -$browser->follow("log in"); -print STDERR $browser->title() . "\n"; -$browser->form_number(1); -$browser->field("myUsername", $USERNAME); -$browser->field("myPassword", $PASSWORD); -$browser->click(); -print STDERR $browser->title() . "\n\n"; - -if ( $zip ) { - $browser->form(1); - $browser->field("zip", $zip); -} else { - $browser->form(6); - $browser->field("origin_lat", $lat); - $browser->field("origin_long", $lon); -} -$browser->click(); - -$gcsdone = 0; -$maxgcs = $ARGV[1] ? $ARGV[1] : 1000000; - -print STDERR "\nPages: "; - -print "\n\n"; - - -while ( $gcsdone < $maxgcs ) { - -$p = HTML::TableContentParser->new(); -$tables = $p->parse($browser->content()); - -$t = ${$tables}[10]; -@rows = @{$t->{rows}}; -$r = $rows[8]; - -@cs = @{$r->{cells}}; - -$browser->form_number(1); - -for ( $i = 0; $i < 20; $i++ ) { - $r = $rows[4+(2*$i)]; - @cs = @{$r->{cells}}; - if ( $gcsdone < $maxgcs && (! $cs[1]->{data}) ) { - if ( $cs[7]->{data} =~ /name='CID' value='(.*)'>/ ) { - $browser->tick("CID", $1); - $gcsdone++; - } - } -} - -$browser->click("Download"); -@lines = split "\n", $browser->content(); -shift @lines; shift @lines; pop @lines; -print join "\n", @lines; -print "\n"; - -$browser->back(); -$form = $browser->form_name("Form1"); -$nextlink = $browser->find_link( text => "Next" ); -if ( $nextlink ) { - if ( ($nextlink->url()) =~ /_ctl([0-9]+)/ ) { - $form->push_input("hidden", {"name" => "ResultsPager:_ctl$1", value => ""}); - } else { - print STDERR "\n\nERROR\n\n"; - print "\n"; - exit(0); - } -} else { - print STDERR "\n\nDONE\n\n"; - print "\n"; - exit(0); -} -$browser->submit(); - -print STDERR "i"; - -} - -print STDERR "\n\nREACHED MAX\n\n"; -print "\n"; +try: + opts, args = getopt.gnu_getopt(sys.argv[1:], "u:p:d", ["help"]) +except getopt.GetoptError: + # print help information and exit: + help() + sys.exit(2) + +DEBUG = False + +for o, a in opts: + if o == "-p": + PASS = a + if o == "-u": + USER = a + if o == "--help" or o == "-h": + help() + sys.exit() + if o == "-d": + DEBUG = True + +if len(args) < 2: + help() + sys.exit() + +######################### +#ll = args[0].split(",") +#lat = ll[0] +#lon = ll[1] +#The following line replaced the previous 3 lines. +lat, lon = args[0].split(",") + +if len(args) >= 3: + maxdist = args[2] +else: + maxdist = "999" + +if len(args) >= 4: + threshold = int(args[3]) +else: + threshold = 1000000; + +# rounds up to multiples of 20. 20 +n = int((int(args[1])+19)/20) + +import re +from mechanize import Browser +import ClientForm + +# get magic number for "Next" button. +# this is normally 16 (link hidden is $ctl16), unless there are less than 10 pages of results, +# in which case it will be less (e.g. 09 for 3 pages of results) +def getmagicnumber(b): + for i in range(16,0,-1): + if re.compile("pgrBottom.ctl%02d" % i).search(b.response().get_data()): + return i + return None + +b=Browser() +b.open("http://geocaching.com/seek/") +b.follow_link(text="Log in") +b.select_form(nr=0) +# The Username and Password fields on the Login form changed +b["ctl00$ContentBody$myUsername"] = USER +b["ctl00$ContentBody$myPassword"] = PASS +b.submit() + +magicnumber = 0 # the ctl number of Next. get only once + +try: b.select_form("form4") +except: pass +try: + b.select_form("form4") +except: + b.select_form("form4") + print >> sys.stderr, "Invalid username/password" + if DEBUG: + f=open("gcget.badlogin.html","w") + f.write(b.response().get_data()) + f.close() + print >> sys.stderr, "Dumping last HTML page recieved into gcget.badlogin.html" + sys.exit() + +b["origin_lat"] = lat +b["origin_long"] = lon +b["dist"] = maxdist +b.submit() + +thresholdre = re.compile("Total Records: ([0-9]*)") +m = thresholdre.search(b.response().get_data()) +if m: + if int(m.group(1)) > threshold: + sys.stderr.write("THRESHOLD %d > %d\n" % (int(m.group(1)), threshold)) + sys.exit(4) + else: + records = int(m.group(1)) + sys.stderr.write("ok found %d, getting min(%d,%d) gcs\n" % (int(m.group(1)), int(records), int(args[1]))) +else: + print "can't find total records" + sys.exit(0) + +pages = 0 +# (records+19)/20 is the max pages +for ii in range(min(n,(records+19)/20)): + try: + b.select_form(nr=0) + b['CID'] = [i.name for i in b.find_control('CID').items] + b.submit() + except: + break + + # only print one header, start of xml file + lines = b.response().get_data().split("\n") + if ii == 0: + print "\n".join(lines[0:2]) + + # core + print "\n".join(lines[2:-1]) + + print "" + + pages += 1 + sys.stderr.write("i") + sys.stderr.flush() + + b.back() + + if not magicnumber: + magicnumber = getmagicnumber(b) + if not magicnumber: +# print "couldn't find magic number!" # why does this happen? + break + + b.select_form(nr=0) + [f for f in b.forms()][0].new_control("hidden", "ctl00$ContentBody$pgrBottom$ctl%02d" % magicnumber, {}) + b.submit() + +sys.stderr.write("\n") + +if pages: + print "" + +# f=open("delmeNOW","w") +# f.write(b.response().get_data()) +# f.close()