]>
Commit | Line | Data |
---|---|---|
28c82d8b EB |
1 | #!/usr/bin/env python |
2 | ||
3 | # | |
4 | # gcget -- screen scrape Geocaching.com's annoying web interface | |
5 | # aka SHOW ME THE CACHE!!! | |
6 | # | |
7 | # Copyright 2007, Evan Battaglia | |
a5c8699d | 8 | # Distributed under the terms of the GPL v2. |
28c82d8b | 9 | # |
05225ccd | 10 | # |
28c82d8b | 11 | # requires module mechanize |
05225ccd | 12 | # |
05225ccd | 13 | |
a5c8699d EB |
14 | # DEFAULT USERNAME AND PASSWORD: THESE ARE OVERWRITTEN BY COMMAND-LINE OPTIONS |
15 | ||
28c82d8b EB |
16 | USER="username" |
17 | PASS="password" | |
18 | ||
19 | # docs needed! | |
20 | # this has some extra args in: | |
21 | # gcget lat,lon maxnumgcs [maxdist] [threshold] | |
22 | # threshold -- if find more than this # of geocaches, don't get ANY, | |
23 | # instead give warning and quit | |
24 | ||
25 | import sys | |
a5c8699d EB |
26 | import getopt |
27 | ||
28 | def help(): | |
29 | print """gcget v0.1 | |
30 | This program is free software, distributed under the terms of the GNU GPL v2. | |
31 | ||
32 | Usage: gcget [-u username] [-p password] lat,lon maxnumberofgcs [maxdistance] [threshold] | |
33 | Downloads up to maxnumberofgcs at a distance of up to maxdistance from lat,lon. | |
34 | If we number of geocaches within maxdistance is above threshold, don't download any | |
35 | geocaches, just give a warning and quit. | |
36 | ||
37 | If username and password are not given, will use default values hard-coded in script. | |
38 | ||
39 | Happy caching!!! | |
40 | """ | |
41 | ||
42 | # | |
43 | # PARSE OPTIONS: USERNAME AND PASSWORD | |
44 | # | |
45 | # | |
46 | # | |
47 | try: | |
c5f63dfe | 48 | opts, args = getopt.gnu_getopt(sys.argv[1:], "u:p:d", ["help"]) |
a5c8699d EB |
49 | except getopt.GetoptError: |
50 | # print help information and exit: | |
51 | help() | |
52 | sys.exit(2) | |
53 | ||
c5f63dfe EB |
54 | DEBUG = False |
55 | ||
a5c8699d EB |
56 | for o, a in opts: |
57 | if o == "-p": | |
58 | PASS = a | |
59 | if o == "-u": | |
60 | USER = a | |
61 | if o == "--help" or o == "-h": | |
62 | help() | |
63 | sys.exit() | |
c5f63dfe EB |
64 | if o == "-d": |
65 | DEBUG = True | |
a5c8699d EB |
66 | |
67 | if len(args) < 2: | |
68 | help() | |
69 | sys.exit() | |
70 | ||
71 | ######################### | |
72 | ||
73 | ll = args[0].split(",") | |
28c82d8b EB |
74 | lat = ll[0] |
75 | lon = ll[1] | |
76 | ||
a5c8699d EB |
77 | if len(args) >= 3: |
78 | maxdist = args[2] | |
28c82d8b EB |
79 | else: |
80 | maxdist = "999" | |
a5c8699d EB |
81 | |
82 | if len(args) >= 4: | |
83 | threshold = int(args[3]) | |
28c82d8b EB |
84 | else: |
85 | threshold = 1000000; | |
86 | ||
87 | # rounds up to multiples of 20. 20 | |
a5c8699d | 88 | n = int((int(args[1])+19)/20) |
28c82d8b EB |
89 | |
90 | import re | |
91 | from mechanize import Browser | |
92 | import ClientForm | |
93 | ||
94 | def getmagicnumber(b): | |
95 | for i in range(16,0,-1): | |
96 | if re.compile("pgrBottom..ctl%d" % i).search(b.response().get_data()): | |
97 | return i | |
98 | return 0 | |
99 | ||
100 | b=Browser() | |
101 | b.open("http://geocaching.com/seek/") | |
c3c9aa0b | 102 | b.follow_link(text="log in") |
28c82d8b EB |
103 | b.select_form(nr=0) |
104 | b["myUsername"] = USER | |
105 | b["myPassword"] = PASS | |
106 | b.submit() | |
107 | ||
108 | magicnumber = 0 # the ctl number of Next. get only once | |
109 | ||
110 | try: b.select_form("form4") | |
111 | except: pass | |
c3c9aa0b EB |
112 | try: |
113 | b.select_form("form4") | |
114 | except: | |
115 | print >> sys.stderr, "Invalid username/password" | |
c5f63dfe EB |
116 | if DEBUG: |
117 | f=open("gcget.badlogin.html","w") | |
118 | f.write(b.response().get_data()) | |
119 | f.close() | |
120 | print >> sys.stderr, "Dumping last HTML page recieved into gcget.badlogin.html" | |
c3c9aa0b | 121 | sys.exit() |
c5f63dfe | 122 | |
28c82d8b EB |
123 | b["origin_lat"] = lat |
124 | b["origin_long"] = lon | |
125 | b["dist"] = maxdist | |
126 | b.submit() | |
127 | ||
128 | thresholdre = re.compile("Total Records: <b>([0-9]*)</b>") | |
129 | m = thresholdre.search(b.response().get_data()) | |
130 | if m: | |
131 | if int(m.group(1)) > threshold: | |
132 | sys.stderr.write("THRESHOLD %d > %d\n" % (int(m.group(1)), threshold)) | |
133 | sys.exit(4) | |
134 | else: | |
135 | records = int(m.group(1)) | |
a5c8699d | 136 | sys.stderr.write("ok found %d, getting min(%d,%d) gcs\n" % (int(m.group(1)), int(records), int(args[1]))) |
28c82d8b EB |
137 | else: |
138 | print "can't find total records" | |
139 | sys.exit(0) | |
140 | ||
141 | pages = 0 | |
142 | # (records+19)/20 is the max pages | |
143 | for ii in range(min(n,(records+19)/20)): | |
144 | try: | |
145 | b.select_form(nr=0) | |
146 | b['CID'] = [i.name for i in b.find_control('CID').items] | |
147 | b.submit() | |
148 | except: | |
149 | break | |
150 | ||
151 | # only print one header, start of xml file | |
152 | lines = b.response().get_data().split("\n") | |
153 | if ii == 0: | |
154 | print "\n".join(lines[0:2]) | |
155 | ||
156 | # core | |
157 | print "\n".join(lines[2:-1]) | |
158 | ||
159 | print "</waypoint>" | |
160 | ||
161 | pages += 1 | |
162 | sys.stderr.write("i") | |
163 | sys.stderr.flush() | |
164 | ||
165 | b.back() | |
166 | ||
167 | if not magicnumber: | |
168 | magicnumber = getmagicnumber(b) | |
169 | ||
170 | b.select_form(nr=0) | |
171 | [f for f in b.forms()][0].new_control("hidden", "pgrBottom$_ctl%d" % magicnumber, {}) | |
172 | b.submit() | |
173 | ||
174 | sys.stderr.write("\n") | |
175 | ||
176 | if pages: | |
177 | print "</loc>" | |
178 | ||
179 | # f=open("delmeNOW","w") | |
180 | # f.write(b.response().get_data()) | |
181 | # f.close() |