]>
Commit | Line | Data |
---|---|---|
28c82d8b EB |
1 | #!/usr/bin/env python |
2 | ||
3 | # | |
4 | # gcget -- screen scrape Geocaching.com's annoying web interface | |
5 | # aka SHOW ME THE CACHE!!! | |
6 | # | |
7 | # Copyright 2007, Evan Battaglia | |
a5c8699d | 8 | # Distributed under the terms of the GPL v2. |
28c82d8b | 9 | # |
05225ccd | 10 | # |
28c82d8b | 11 | # requires module mechanize |
05225ccd | 12 | # |
05225ccd | 13 | |
a5c8699d EB |
14 | # DEFAULT USERNAME AND PASSWORD: THESE ARE OVERWRITTEN BY COMMAND-LINE OPTIONS |
15 | ||
28c82d8b EB |
16 | USER="username" |
17 | PASS="password" | |
18 | ||
19 | # docs needed! | |
20 | # this has some extra args in: | |
21 | # gcget lat,lon maxnumgcs [maxdist] [threshold] | |
22 | # threshold -- if find more than this # of geocaches, don't get ANY, | |
23 | # instead give warning and quit | |
24 | ||
25 | import sys | |
a5c8699d EB |
26 | import getopt |
27 | ||
28 | def help(): | |
29 | print """gcget v0.1 | |
30 | This program is free software, distributed under the terms of the GNU GPL v2. | |
31 | ||
32 | Usage: gcget [-u username] [-p password] lat,lon maxnumberofgcs [maxdistance] [threshold] | |
33 | Downloads up to maxnumberofgcs at a distance of up to maxdistance from lat,lon. | |
34 | If we number of geocaches within maxdistance is above threshold, don't download any | |
35 | geocaches, just give a warning and quit. | |
36 | ||
37 | If username and password are not given, will use default values hard-coded in script. | |
38 | ||
39 | Happy caching!!! | |
40 | """ | |
41 | ||
42 | # | |
43 | # PARSE OPTIONS: USERNAME AND PASSWORD | |
44 | # | |
45 | # | |
46 | # | |
47 | try: | |
c5f63dfe | 48 | opts, args = getopt.gnu_getopt(sys.argv[1:], "u:p:d", ["help"]) |
a5c8699d EB |
49 | except getopt.GetoptError: |
50 | # print help information and exit: | |
51 | help() | |
52 | sys.exit(2) | |
53 | ||
c5f63dfe EB |
54 | DEBUG = False |
55 | ||
a5c8699d EB |
56 | for o, a in opts: |
57 | if o == "-p": | |
58 | PASS = a | |
59 | if o == "-u": | |
60 | USER = a | |
61 | if o == "--help" or o == "-h": | |
62 | help() | |
63 | sys.exit() | |
c5f63dfe EB |
64 | if o == "-d": |
65 | DEBUG = True | |
a5c8699d EB |
66 | |
67 | if len(args) < 2: | |
68 | help() | |
69 | sys.exit() | |
70 | ||
71 | ######################### | |
391505be MC |
72 | #ll = args[0].split(",") |
73 | #lat = ll[0] | |
74 | #lon = ll[1] | |
75 | #The following line replaced the previous 3 lines. | |
76 | lat, lon = args[0].split(",") | |
baa015c0 | 77 | |
a5c8699d EB |
78 | if len(args) >= 3: |
79 | maxdist = args[2] | |
28c82d8b EB |
80 | else: |
81 | maxdist = "999" | |
a5c8699d EB |
82 | |
83 | if len(args) >= 4: | |
84 | threshold = int(args[3]) | |
28c82d8b EB |
85 | else: |
86 | threshold = 1000000; | |
87 | ||
88 | # rounds up to multiples of 20. 20 | |
a5c8699d | 89 | n = int((int(args[1])+19)/20) |
28c82d8b EB |
90 | |
91 | import re | |
92 | from mechanize import Browser | |
93 | import ClientForm | |
94 | ||
baa015c0 EB |
95 | # get magic number for "Next" button. |
96 | # this is normally 16 (link hidden is $ctl16), unless there are less than 10 pages of results, | |
97 | # in which case it will be less (e.g. 09 for 3 pages of results) | |
28c82d8b EB |
98 | def getmagicnumber(b): |
99 | for i in range(16,0,-1): | |
baa015c0 | 100 | if re.compile("pgrBottom.ctl%02d" % i).search(b.response().get_data()): |
28c82d8b | 101 | return i |
baa015c0 | 102 | return None |
28c82d8b EB |
103 | |
104 | b=Browser() | |
105 | b.open("http://geocaching.com/seek/") | |
baa015c0 | 106 | b.follow_link(text="Log in") |
28c82d8b | 107 | b.select_form(nr=0) |
391505be MC |
108 | # The Username and Password fields on the Login form changed |
109 | b["ctl00$ContentBody$myUsername"] = USER | |
110 | b["ctl00$ContentBody$myPassword"] = PASS | |
28c82d8b EB |
111 | b.submit() |
112 | ||
113 | magicnumber = 0 # the ctl number of Next. get only once | |
114 | ||
115 | try: b.select_form("form4") | |
116 | except: pass | |
c3c9aa0b EB |
117 | try: |
118 | b.select_form("form4") | |
119 | except: | |
baa015c0 | 120 | b.select_form("form4") |
c3c9aa0b | 121 | print >> sys.stderr, "Invalid username/password" |
c5f63dfe EB |
122 | if DEBUG: |
123 | f=open("gcget.badlogin.html","w") | |
124 | f.write(b.response().get_data()) | |
125 | f.close() | |
126 | print >> sys.stderr, "Dumping last HTML page recieved into gcget.badlogin.html" | |
c3c9aa0b | 127 | sys.exit() |
c5f63dfe | 128 | |
28c82d8b EB |
129 | b["origin_lat"] = lat |
130 | b["origin_long"] = lon | |
131 | b["dist"] = maxdist | |
132 | b.submit() | |
133 | ||
134 | thresholdre = re.compile("Total Records: <b>([0-9]*)</b>") | |
135 | m = thresholdre.search(b.response().get_data()) | |
136 | if m: | |
137 | if int(m.group(1)) > threshold: | |
138 | sys.stderr.write("THRESHOLD %d > %d\n" % (int(m.group(1)), threshold)) | |
139 | sys.exit(4) | |
140 | else: | |
141 | records = int(m.group(1)) | |
a5c8699d | 142 | sys.stderr.write("ok found %d, getting min(%d,%d) gcs\n" % (int(m.group(1)), int(records), int(args[1]))) |
28c82d8b EB |
143 | else: |
144 | print "can't find total records" | |
145 | sys.exit(0) | |
146 | ||
147 | pages = 0 | |
148 | # (records+19)/20 is the max pages | |
149 | for ii in range(min(n,(records+19)/20)): | |
150 | try: | |
151 | b.select_form(nr=0) | |
152 | b['CID'] = [i.name for i in b.find_control('CID').items] | |
153 | b.submit() | |
154 | except: | |
155 | break | |
156 | ||
157 | # only print one header, start of xml file | |
158 | lines = b.response().get_data().split("\n") | |
159 | if ii == 0: | |
160 | print "\n".join(lines[0:2]) | |
161 | ||
162 | # core | |
163 | print "\n".join(lines[2:-1]) | |
164 | ||
165 | print "</waypoint>" | |
166 | ||
167 | pages += 1 | |
168 | sys.stderr.write("i") | |
169 | sys.stderr.flush() | |
170 | ||
171 | b.back() | |
172 | ||
173 | if not magicnumber: | |
174 | magicnumber = getmagicnumber(b) | |
baa015c0 | 175 | if not magicnumber: |
48cd009e | 176 | # print "couldn't find magic number!" # why does this happen? |
baa015c0 | 177 | break |
28c82d8b EB |
178 | |
179 | b.select_form(nr=0) | |
baa015c0 | 180 | [f for f in b.forms()][0].new_control("hidden", "ctl00$ContentBody$pgrBottom$ctl%02d" % magicnumber, {}) |
28c82d8b EB |
181 | b.submit() |
182 | ||
183 | sys.stderr.write("\n") | |
184 | ||
185 | if pages: | |
186 | print "</loc>" | |
187 | ||
188 | # f=open("delmeNOW","w") | |
189 | # f.write(b.response().get_data()) | |
190 | # f.close() | |
baa015c0 | 191 |