]>
Commit | Line | Data |
---|---|---|
06ee5545 RN |
1 | #!/bin/sh |
2 | ||
3 | PROGNAME="$0" | |
4 | ||
5 | usage() { | |
6 | cat <<EOF | |
7 | NAME | |
8 | `basename $PROGNAME`- Convert gc.com *printable* web pages into GPX | |
9 | ||
10 | SYNOPSIS | |
11 | `basename $PROGNAME` [options] [gc-com.html]... | |
12 | ||
13 | DESCRIPTION | |
14 | Convert gc.com *printable* web pages into GPX, including | |
15 | cache description and all logs. | |
16 | ||
17 | The *printable* web pages can be fetched using geo-nearest, | |
18 | geo-newest, geo-placed, geo-found, or geo-gid with the -H option. | |
19 | ||
20 | OPTIONS | |
21 | -b Normalize output by postprocessing with gpsbabel | |
22 | -e Encode hints with rot13 (e.g. NORTH = ABEGU) | |
23 | -i Incremental, no XML and GPX headers | |
24 | -l number Maximum number of log entries to be exported [unlimited] | |
25 | -n No HTML in descriptions (experimental) | |
26 | -o FMT Output FMT instead of GPX by using gpsbabel | |
27 | -u username Indicate found status for username [$USERNAME] | |
28 | -w Do not add "Additional Waypoints" to the GPX output | |
29 | -z Do not output waypoints with "zero" coordinates | |
30 | -E var=val Set environment "var" to "val" | |
31 | i.e. DATEFMT=0|1 | |
32 | -D lvl Debug level | |
33 | ||
34 | DEFAULTS | |
35 | Defaults can also be set with variables in file \$HOME/.georc: | |
36 | ||
37 | DATEFMT=[0|1]; | |
38 | ||
39 | DATE FORMATS | |
40 | Geocaching.com date formats that are compatible: | |
41 | ||
42 | GC Format Example Compatible | |
43 | YYYY-MM-DD 2011-07-13 yes | |
44 | YYYY/MM/DD 2011/07/13 yes | |
45 | MM/DD/YYYY 07/13/2011 yes | |
46 | DD/MM/YYYY 13/07/2011 yes if DATEFMT=1 in \$HOME/.georc | |
47 | DD/Mmm/YYYY 13/Jul/2001 no | |
48 | Mmm/DD/YYYY Jul/13/2011 no | |
49 | DD Mmm YY 13 Jul 11 yes (english only) | |
50 | ||
51 | EXAMPLES | |
52 | Convert into GPX: | |
53 | ||
54 | geo-found -n9999 -H. > /dev/null | |
55 | geo-html2gpx *.html > found.gpx | |
56 | EOF | |
57 | ||
58 | exit 1 | |
59 | } | |
60 | ||
61 | # | |
62 | # Report an error and exit | |
63 | # | |
64 | error() { | |
65 | echo "`basename $PROGNAME`: $1" >&2 | |
66 | exit 1 | |
67 | } | |
68 | ||
69 | debug() { | |
70 | if [ $DEBUG -ge $1 ]; then | |
71 | echo "`basename $PROGNAME`: $2" >&2 | |
72 | fi | |
73 | } | |
74 | ||
75 | if [ `uname` = 'Darwin' ]; then | |
76 | awk=gawk | |
77 | date=gdate | |
78 | else | |
79 | awk=awk | |
80 | date=date | |
81 | fi | |
82 | ||
83 | # | |
84 | # Read RC file, if there is one | |
85 | # | |
86 | USERNAME= | |
87 | if [ -f $HOME/.georc ]; then | |
88 | . $HOME/.georc | |
89 | # N.B. must switch to read_rc_file if LAT/LON is ever needed here | |
90 | fi | |
91 | # | |
92 | ||
93 | # Process the options | |
94 | # | |
95 | POSTPROC="cat" | |
96 | DEBUG=0 | |
97 | INCR=0 | |
98 | NOWPTS=0 | |
99 | NOZERO=0 | |
100 | NOHTML=0 | |
101 | DECODE=1 | |
102 | NUMLOGS=1000000 | |
103 | while getopts "beE:iwzl:no:u:D:h?" opt | |
104 | do | |
105 | case $opt in | |
106 | b) POSTPROC="gpsbabel -igpx -f- -ogpx -F-";; | |
107 | e) DECODE=0;; | |
108 | E) eval "$OPTARG";; | |
109 | i) INCR=1;; | |
110 | l) NUMLOGS="$OPTARG";; | |
111 | o) POSTPROC="gpsbabel -igpx -f- -o$OPTARG -F-";; | |
112 | n) NOHTML=1;; | |
113 | u) USERNAME="$OPTARG";; | |
114 | w) NOWPTS=1;; | |
115 | z) NOZERO=1;; | |
116 | D) DEBUG="$OPTARG";; | |
117 | h|\?) usage;; | |
118 | esac | |
119 | done | |
120 | shift `expr $OPTIND - 1` | |
121 | ||
122 | # | |
123 | # Main Program | |
124 | # | |
125 | YR=`date +"%Y"` | |
126 | ||
127 | cat "$@" | tr -d '\001\002\003\004\005\006\007\015\022\026\030' \ | |
128 | | sed 's/<A /\ | |
129 | <A /g' | | |
130 | $awk -vDEBUG=$DEBUG -vINCR=$INCR \ | |
131 | -vNOWPTS=$NOWPTS -vNOZERO=$NOZERO \ | |
132 | -vNOHTML=$NOHTML \ | |
133 | -vDECODE=$DECODE \ | |
134 | -vUSERNAME="$USERNAME" \ | |
135 | -vDATE="$date" \ | |
136 | -vDATEFMT="$DATEFMT" \ | |
137 | -vYR="$YR" -vNUMLOGS=$NUMLOGS \ | |
138 | ' | |
139 | # Copyright (c) 2010 Dan Saar | |
140 | # | |
141 | # Permission is hereby granted, free of charge, to any person obtaining a copy | |
142 | # of this software and associated documentation files (the "Software"), to deal | |
143 | # in the Software without restriction, including without limitation the rights | |
144 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
145 | # copies of the Software, and to permit persons to whom the Software is | |
146 | # furnished to do so, subject to the following conditions: | |
147 | # | |
148 | # The above copyright notice and this permission notice shall be included in | |
149 | # all copies or substantial portions of the Software. | |
150 | # | |
151 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
152 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
153 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
154 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
155 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
156 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
157 | # THE SOFTWARE. | |
158 | ||
159 | function prsJSON_hex2num(s, rv, ii, len, k) | |
160 | { | |
161 | rv = 0 | |
162 | s = tolower(s) | |
163 | len = length(s) | |
164 | ||
165 | for (ii = 1; ii <= len; ii++) | |
166 | { | |
167 | k = index("0123456789abcdef", substr(s, ii, 1)) | |
168 | if (k > 0) | |
169 | rv = rv * 16 + (k-1) | |
170 | else | |
171 | break; | |
172 | } | |
173 | ||
174 | return rv | |
175 | } | |
176 | ||
177 | function prsJSON_EncodeAsUTF8( v, s, p1, p2, p3, p4, cs ) | |
178 | { | |
179 | cs = "\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377" | |
180 | ||
181 | if ( v < 128 ) | |
182 | s = sprintf("%c", v ) | |
183 | ||
184 | else if ( v < 2048 ) # 110xxxxx 10xxxxxx | |
185 | { | |
186 | p1 = int(v/64) % 32 | |
187 | p2 = v % 64 | |
188 | s = substr(cs, 65+p1, 1) substr(cs, p2+1, 1) | |
189 | } | |
190 | ||
191 | else if ( v < 65536 ) # 1110xxxx 10xxxxxx 10xxxxxx | |
192 | { | |
193 | p1 = int(v/4096) % 16 | |
194 | p2 = int(v/64) % 64 | |
195 | p3 = v % 64 | |
196 | s = substr(cs, 97+p1, 1) substr(cs, p2+1, 1) substr(cs, p3+1, 1) | |
197 | } | |
198 | ||
199 | else if ( v < 1114112 ) # 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx | |
200 | { | |
201 | p1 = int(v/262144) % 8 | |
202 | p2 = int(v/4096) % 64 | |
203 | p3 = int(v/64) % 64 | |
204 | p4 = v % 64 | |
205 | s = substr(cs, 113+p1, 1) substr(cs, p2+1, 1) substr(cs, p3+1, 1) substr(cs, p4+1, 1) | |
206 | } | |
207 | ||
208 | else | |
209 | s = "" | |
210 | ||
211 | return s; | |
212 | } | |
213 | ||
214 | function prsJSON_UnescapeString(jsonString, matchedString, matchedValue) | |
215 | { | |
216 | if (jsonString == "\"\"") | |
217 | return "" | |
218 | ||
219 | if (jsonString ~ /^".+"$/) | |
220 | jsonString = substr(jsonString,2,length(jsonString)-2) | |
221 | ||
222 | gsub(/\\\\/, "\\u005C", jsonString) | |
223 | gsub(/\\"/, "\"", jsonString) | |
224 | gsub(/\\\//, "/", jsonString) | |
225 | gsub(/\\b/, "\b", jsonString) | |
226 | gsub(/\\f/, "\f", jsonString) | |
227 | gsub(/\\n/, "\n", jsonString) | |
228 | gsub(/\\r/, "\r", jsonString) | |
229 | gsub(/\\t/, "\t", jsonString) | |
230 | ||
231 | if (match(jsonString, /\\[^u]/)) | |
232 | return "ParseJSON Error: Invalid String at " jsonString | |
233 | ||
234 | # handle encoded UTF-16 surrogates | |
235 | while (match(jsonString, /\\uD[89AaBb][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]\\uD[CcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/)) | |
236 | { | |
237 | matchedValue = (prsJSON_hex2num(substr(jsonString, RSTART+2, 4)) % 1024) * 1024 + prsJSON_hex2num(substr(jsonString, RSTART+8, 4)) % 1024 + 65536 | |
238 | #print matchedValue, substr(jsonString, RSTART+2, 4), substr(jsonString, RSTART+8, 4) | |
239 | matchedString = prsJSON_EncodeAsUTF8( matchedValue ) | |
240 | sub(/\\uD[89AaBb][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]\\uD[CcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/, matchedString, jsonString) | |
241 | } | |
242 | ||
243 | while (match(jsonString, /\\u[0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/)) | |
244 | { | |
245 | matchedValue = prsJSON_hex2num(substr(jsonString, RSTART+2, 4)) | |
246 | matchedString = prsJSON_EncodeAsUTF8( matchedValue ) | |
247 | sub(/\\u[0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/, matchedString, jsonString) | |
248 | } | |
249 | ||
250 | return jsonString; | |
251 | } | |
252 | ||
253 | function prsJSON_ValidString(jsonString) | |
254 | { | |
255 | return jsonString !~ /^ParseJSON Error: Invalid String at / | |
256 | } | |
257 | ||
258 | function prsJSON_SetDataValue(jsonData, prefix, value) | |
259 | { | |
260 | jsonData[prefix] = value | |
261 | } | |
262 | ||
263 | function prsJSON_Error(jsonStringArr, cnt, idx, jsonData, message) | |
264 | { | |
265 | split("", jsonData) | |
266 | prsJSON_SetDataValue(jsonData, "1", sprintf("ParseJSON Error: %s at ", message) (idx <= cnt ? jsonStringArr[idx] : "")) | |
267 | split("", jsonStringArr) | |
268 | return cnt + 1 | |
269 | } | |
270 | ||
271 | function prsJSON_CopyError(jsonData, tv) | |
272 | { | |
273 | split("", jsonData) | |
274 | prsJSON_SetDataValue(jsonData, "1", tv[1]) | |
275 | } | |
276 | ||
277 | function prsJSON_ParseNumber(jsonStringArr, cnt, idx, jsonData, prefix) | |
278 | { | |
279 | if (idx <= cnt) | |
280 | { | |
281 | if (match(jsonStringArr[idx], /^(\-?)(0|[123456789][0123456789]*)(\.[0123456789]+)?([eE][+-]?[0123456789]+)?/)) | |
282 | { | |
283 | prsJSON_SetDataValue(jsonData, prefix, substr(jsonStringArr[idx], 1, RLENGTH)) | |
284 | jsonStringArr[idx] = length(jsonStringArr[idx]) >= RLENGTH+1 ? substr(jsonStringArr[idx], RLENGTH+1) : "" | |
285 | } | |
286 | else | |
287 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Number not found") # starts like a number, but doesnt match the REGEX | |
288 | } | |
289 | ||
290 | return idx | |
291 | } | |
292 | ||
293 | function prsJSON_ParseString(jsonStringArr, cnt, idx, jsonData, prefix, jsonString, idxn, idxs, idxq, t) | |
294 | { | |
295 | if (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == "\"") | |
296 | { | |
297 | idxn = 2 | |
298 | jsonString = jsonStringArr[idx] | |
299 | ||
300 | do | |
301 | { | |
302 | t = length(jsonString) >= idxn ? substr(jsonString, idxn) : "" | |
303 | idxs = index(t, "\\") | |
304 | idxq = index(t, "\"") | |
305 | ||
306 | # no valid close quote found | |
307 | if (idxq == 0) | |
308 | { | |
309 | if (idx == cnt) | |
310 | break; | |
311 | ||
312 | idx++ | |
313 | jsonString = jsonString "," jsonStringArr[idx] | |
314 | } | |
315 | ||
316 | # a valid close quote was found - not before a slash | |
317 | if (idxq != 0 && (idxs == 0 || (idxs != 0 && idxq < idxs))) | |
318 | break; | |
319 | ||
320 | if (idxs != 0 && idxq == idxs + 1) # slash quote | |
321 | idxn = idxn + idxq | |
322 | ||
323 | else | |
324 | idxn = idxn + idxs + 1 | |
325 | ||
326 | } while (1) | |
327 | ||
328 | if (idxq > 0) | |
329 | { | |
330 | t = substr(jsonString, 1, idxn+idxq-1) | |
331 | if (match(t, /[\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037]/) == 0) | |
332 | { | |
333 | t = prsJSON_UnescapeString(t) | |
334 | if ( prsJSON_ValidString(t) ) | |
335 | { | |
336 | prsJSON_SetDataValue(jsonData, prefix, t) | |
337 | jsonStringArr[idx] = length(jsonString) >= idxn+idxq ? substr(jsonString,idxn+idxq) : "" | |
338 | } | |
339 | else | |
340 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Invalid string") | |
341 | } | |
342 | else | |
343 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Invalid character in string") | |
344 | } | |
345 | else | |
346 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Unterminated string") | |
347 | } | |
348 | else | |
349 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "String expected") | |
350 | ||
351 | return idx | |
352 | } | |
353 | ||
354 | function prsJSON_ParseObject(jsonStringArr, cnt, idx, jsonData, prefix, tv ) | |
355 | { | |
356 | if (idx <= cnt) | |
357 | { | |
358 | sub(/^\{[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip open { and skipwhite | |
359 | ||
360 | while (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) != "}") | |
361 | { | |
362 | idx = prsJSON_ParseString(jsonStringArr, cnt, idx, tv, "1") | |
363 | ||
364 | if (idx <= cnt && length(tv[1]) == 0) | |
365 | idx = prsJSON_Error(jsonStringArr, cnt, idx, tv, "Empty string used for property name") | |
366 | ||
367 | if (idx <= cnt) | |
368 | { | |
369 | sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite | |
370 | ||
371 | if ( length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == ":" ) | |
372 | { | |
373 | sub(/^:[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip colon and skipwhite | |
374 | ||
375 | if ( length(jsonStringArr[idx]) > 0 ) | |
376 | { | |
377 | idx = prsJSON_ParseJSONInt(jsonStringArr, cnt, idx, jsonData, prefix != "" ? prefix SUBSEP tv[1] : tv[1]) | |
378 | if (idx <= cnt) | |
379 | { | |
380 | sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite | |
381 | ||
382 | if (length(jsonStringArr[idx]) == 0 && idx < cnt) | |
383 | { | |
384 | idx++ | |
385 | sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite | |
386 | if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) == "}") | |
387 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected object property") | |
388 | } | |
389 | ||
390 | else if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "}") | |
391 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected object property or closing brace") | |
392 | } | |
393 | } | |
394 | else | |
395 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected JSON value (1)") | |
396 | } | |
397 | else | |
398 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected colon") | |
399 | } | |
400 | else | |
401 | prsJSON_CopyError(jsonData, tv) | |
402 | } | |
403 | ||
404 | if (idx <= cnt && (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "}")) | |
405 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected closing brace") | |
406 | ||
407 | if (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == "}") | |
408 | sub(/^\}[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip close } and skipwhite | |
409 | } | |
410 | ||
411 | return idx | |
412 | } | |
413 | ||
414 | function prsJSON_ParseArray(jsonStringArr, cnt, idx, jsonData, prefix, ii) | |
415 | { | |
416 | if (idx <= cnt) | |
417 | { | |
418 | sub(/^\[[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip open bracket and skipwhite | |
419 | ii = 1 | |
420 | ||
421 | while (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) != "]") | |
422 | { | |
423 | idx = prsJSON_ParseJSONInt(jsonStringArr, cnt, idx, jsonData, prefix != "" ? prefix SUBSEP ii : ii ) | |
424 | ii++ | |
425 | ||
426 | if (idx <= cnt) | |
427 | { | |
428 | sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite | |
429 | ||
430 | if (length(jsonStringArr[idx]) == 0 && idx < cnt) | |
431 | { | |
432 | idx++; | |
433 | sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite | |
434 | if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) == "]") | |
435 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected array value") | |
436 | } | |
437 | ||
438 | else if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "]") | |
439 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected array value or closing bracket") | |
440 | } | |
441 | } | |
442 | ||
443 | if (idx <= cnt && (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "]")) | |
444 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected closing bracket") | |
445 | ||
446 | if (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == "]") | |
447 | sub(/^\][ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip close bracket and skipwhite | |
448 | } | |
449 | ||
450 | return idx | |
451 | } | |
452 | ||
453 | function prsJSON_ParseJSONInt(jsonStringArr, cnt, idx, jsonData, prefix, tk) | |
454 | { | |
455 | if (idx <= cnt) | |
456 | { | |
457 | sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite | |
458 | ||
459 | if (length(jsonStringArr[idx]) > 0) | |
460 | { | |
461 | tk = substr(jsonStringArr[idx], 1, 1) | |
462 | if (tk == "\"" && prefix != "") | |
463 | idx = prsJSON_ParseString(jsonStringArr, cnt, idx, jsonData, prefix) | |
464 | else if (tk ~ /^[0123456789-]/ && prefix != "") | |
465 | idx = prsJSON_ParseNumber(jsonStringArr, cnt, idx, jsonData, prefix) | |
466 | else if (jsonStringArr[idx] ~ /^true/ && prefix != "") | |
467 | { | |
468 | prsJSON_SetDataValue(jsonData, prefix, "<<true>>") | |
469 | jsonStringArr[idx] = length(jsonStringArr[idx]) <= 4 ? "" : substr(jsonStringArr[idx],5) | |
470 | } | |
471 | else if (jsonStringArr[idx] ~ /^false/ && prefix != "") | |
472 | { | |
473 | prsJSON_SetDataValue(jsonData, prefix, "<<false>>") | |
474 | jsonStringArr[idx] = length(jsonStringArr[idx]) <= 5 ? "" : substr(jsonStringArr[idx],6) | |
475 | } | |
476 | else if (jsonStringArr[idx] ~ /^null/ && prefix != "") | |
477 | { | |
478 | prsJSON_SetDataValue(jsonData, prefix, "<<null>>") | |
479 | jsonStringArr[idx] = length(jsonStringArr[idx]) <= 4 ? "" : substr(jsonStringArr[idx],5) | |
480 | } | |
481 | else if (tk == "{") | |
482 | idx = prsJSON_ParseObject(jsonStringArr, cnt, idx, jsonData, prefix) | |
483 | else if (tk == "[") | |
484 | idx = prsJSON_ParseArray(jsonStringArr, cnt, idx, jsonData, prefix) | |
485 | else | |
486 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected JSON value (2)") | |
487 | ||
488 | if (idx <= cnt) | |
489 | sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite | |
490 | } | |
491 | ||
492 | if (prefix == "" && idx <= cnt && length(jsonStringArr[idx]) != 0) | |
493 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected end of JSON text") | |
494 | else if (prefix == "" && idx+1 <= cnt) | |
495 | { | |
496 | idx++ | |
497 | idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected end of JSON text (2)") | |
498 | } | |
499 | ||
500 | } | |
501 | ||
502 | return idx | |
503 | } | |
504 | ||
505 | # | |
506 | # JSON Formatting Routines | |
507 | # | |
508 | ||
509 | function useJSON_ArrayCount( possibleArray, a, min, max, cnt, rv) | |
510 | { | |
511 | cnt = 0 | |
512 | ||
513 | for ( a in possibleArray ) | |
514 | { | |
515 | if (possibleArray[a] "" !~ /^[0123456789][0123456789]*$/) | |
516 | return -1 | |
517 | ||
518 | if ( cnt == 0 ) | |
519 | { | |
520 | min = possibleArray[a] | |
521 | max = possibleArray[a] | |
522 | } | |
523 | else | |
524 | { | |
525 | if (min == possibleArray[a] || max == possibleArray[a]) | |
526 | return -1 | |
527 | ||
528 | if (possibleArray[a] < min) | |
529 | min = possibleArray[a] | |
530 | ||
531 | if (max < possibleArray[a]) | |
532 | max = possibleArray[a] | |
533 | } | |
534 | ||
535 | cnt++ | |
536 | } | |
537 | ||
538 | if (min == 1 && max == cnt) | |
539 | return cnt | |
540 | ||
541 | return -1 | |
542 | } | |
543 | ||
544 | function useJSON_GetObjectMembers(jsonSchema, prefix) | |
545 | { | |
546 | if (prefix == "") prefix = "<<novalue>>" | |
547 | return prefix in jsonSchema ? jsonSchema[prefix] : "" | |
548 | } | |
549 | ||
550 | # quick sort array arr | |
551 | function utlJSON_qsortArray(arr, left, right, i, last, t) | |
552 | { | |
553 | if (left >= right) # do nothing if array has less than 2 elements | |
554 | return | |
555 | i = left + int((right-left+1)*rand()) | |
556 | t = arr[left]; | |
557 | arr[left] = arr[i]; | |
558 | arr[i] = t | |
559 | last = left # arr[left] is now partition element | |
560 | for (i = left+1; i <= right; i++) | |
561 | { | |
562 | if (arr[i] < arr[left]) | |
563 | { | |
564 | last++ | |
565 | t = arr[last]; | |
566 | arr[last] = arr[i]; | |
567 | arr[i] = t | |
568 | } | |
569 | } | |
570 | t = arr[left]; | |
571 | arr[left] = arr[last]; | |
572 | arr[last] = t | |
573 | utlJSON_qsortArray(arr, left, last-1) | |
574 | utlJSON_qsortArray(arr, last+1, right) | |
575 | } | |
576 | ||
577 | function useJSON_GetSchema(jsonData, jsonSchema, a, tidx, tv, sv, idx) | |
578 | { | |
579 | split("", jsonSchema) | |
580 | for (a in jsonData) | |
581 | { | |
582 | while (match(a, SUBSEP "[^" SUBSEP "]+$")) | |
583 | { | |
584 | tidx = substr(a,1,RSTART-1) | |
585 | tv = substr(a,RSTART+1) | |
586 | sv = (tidx in jsonSchema) ? jsonSchema[tidx] : "" | |
587 | # if ( sv != tv && sv !~ "^" tv SUBSEP && sv !~ SUBSEP tv "$" && sv !~ SUBSEP tv SUBSEP ) | |
588 | # Rephrase this using index so object member names with regex characters work | |
589 | if ( sv != tv && index(sv, tv SUBSEP) != 1 && (length(sv) <= length(tv)+1 || substr(sv, length(sv)-length(tv)) != SUBSEP tv) && index(sv, SUBSEP tv SUBSEP) == 0 ) | |
590 | jsonSchema[tidx] = sv (sv == "" ? "" : SUBSEP) tv | |
591 | a = tidx | |
592 | } | |
593 | ||
594 | tidx = "<<novalue>>" | |
595 | tv = a | |
596 | sv = (tidx in jsonSchema) ? jsonSchema[tidx] : "" | |
597 | if ( sv != tv && sv !~ "^" tv SUBSEP && sv !~ SUBSEP tv "$" && sv !~ SUBSEP tv SUBSEP ) | |
598 | jsonSchema[tidx] = sv (sv == "" ? "" : SUBSEP) tv | |
599 | } | |
600 | } | |
601 | ||
602 | function useJSON_EscapeString(s, ii, c, t, t2, t3, t4, cs) | |
603 | { | |
604 | cs = "\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377" | |
605 | gsub(/\\/, "\\u005C", s) | |
606 | gsub(/"/, "\\\"", s) | |
607 | #gsub(/\//, "\\/", s) # required to decode, but not to encode | |
608 | gsub(/\b/, "\\b", s) | |
609 | gsub(/\f/, "\\f", s) | |
610 | gsub(/\n/, "\\n", s) | |
611 | gsub(/\r/, "\\r", s) | |
612 | gsub(/\t/, "\\t", s) | |
613 | ||
614 | for ( ii = 1 ; ii <= length(s) ; ii++ ) | |
615 | { | |
616 | t = substr(s,ii,1) | |
617 | ||
618 | if (t == "\000") # having \000 in list below doesnt work in all awks | |
619 | { | |
620 | c = 0 | |
621 | s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii==length(s) ? "" : substr(s, ii+1)) | |
622 | ii += 5 | |
623 | } | |
624 | else | |
625 | { | |
626 | c = index("\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037", t) | |
627 | c = c == 0 ? -1 : c | |
628 | ||
629 | if ( c >= 0 ) | |
630 | { | |
631 | s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii==length(s) ? "" : substr(s, ii+1)) | |
632 | ii += 5 | |
633 | } | |
634 | } | |
635 | ||
636 | t = index(cs, t) | |
637 | t2 = ii+1 <= length(s) ? index(cs, substr(s,ii+1,1)) : 0 | |
638 | t3 = ii+2 <= length(s) ? index(cs, substr(s,ii+2,1)) : 0 | |
639 | t4 = ii+3 <= length(s) ? index(cs, substr(s,ii+3,1)) : 0 | |
640 | ||
641 | if ( c < 0 && t > 64 && t <= 96 && ii+1 <= length(s) && t2 > 0 && t2 <= 64) # two character UTF-8 sequence | |
642 | { | |
643 | c = (t - 65)*64 + (t2-1) | |
644 | s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii+1==length(s) ? "" : substr(s, ii+2)) | |
645 | ii += 5 | |
646 | } | |
647 | ||
648 | else if ( c < 0 && t > 96 && t <= 112 && ii+2 <= length(s) && t2 > 0 && t2 <= 64 && t3 > 0 && t3 <= 64) # three character UTF-8 sequence | |
649 | { | |
650 | c = (t - 97)*4096 + (t2-1)*64 + (t3-1) | |
651 | if ( c < 65536 ) | |
652 | { | |
653 | s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii+2==length(s) ? "" : substr(s, ii+3)) | |
654 | ii += 5 | |
655 | } | |
656 | else | |
657 | { | |
658 | # encode in JSON-style with two \u#### UTF-16 surrogates | |
659 | # printf("1: %08X\n", c) | |
660 | s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X\\u%04X", (c/1024)%1024 + 55296, c%1024 + 56320) (ii+3==length(s) ? "" : substr(s, ii+4)) | |
661 | ii += 11 | |
662 | } | |
663 | } | |
664 | ||
665 | # four character UTF-8 sequence, encode in JSON-style with two \u#### UTF-16 surrogates | |
666 | else if ( c < 0 && t > 112 && t <= 120 && ii+3 <= length(s) && t2 > 0 && t2 <= 64 && t3 > 0 && t3 <= 64 && t4 > 0 && t4 <= 64) | |
667 | { | |
668 | c = (t - 113)*262144 + (t2-1)*4096 + (t3-1)*64 + (t4-1) | |
669 | # printf("2: %08X, %d, %d, %d, %d\n", c, t, t2, t3, t4) | |
670 | # printf("\\u%04X\\u%04X\n", (c/1024)%1024 + 55296, c%1024 + 56320) | |
671 | c -= 65536 | |
672 | s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X\\u%04X", (c/1024)%1024 + 55296, c%1024 + 56320) (ii+3==length(s) ? "" : substr(s, ii+4)) | |
673 | ii += 11 | |
674 | } | |
675 | } | |
676 | ||
677 | return "\"" s "\"" | |
678 | } | |
679 | ||
680 | function useJSON_GetDataValue(jsonData, prefix) | |
681 | { | |
682 | return prefix in jsonData ? jsonData[prefix] : "<<novalue>>" | |
683 | } | |
684 | ||
685 | function useJSON_PrettyFormat(s, pretty) | |
686 | { | |
687 | if (s == "" || pretty <= 0) return s | |
688 | ||
689 | # dont sprintf the whole thing, some awks have short buffers for sprintf | |
690 | return sprintf("%*.*s", (pretty-1)*3, (pretty-1)*3, "") s (s == "}" || s == "]" ? "" : "\n") | |
691 | } | |
692 | ||
693 | function useJSON_FormatInt(jsonData, jsonSchema, prefix, pretty, allLines, member, memberArr, memberList, arrCount, a, ii) | |
694 | { | |
695 | memberList = useJSON_GetObjectMembers(jsonSchema, prefix) | |
696 | ||
697 | if ( memberList == "" ) | |
698 | { | |
699 | a = useJSON_GetDataValue(jsonData, prefix) | |
700 | if ( a == "<<true>>" ) return "true" | |
701 | if ( a == "<<false>>" ) return "false" | |
702 | if ( a == "<<null>>" ) return "null" | |
703 | if ( a == "<<novalue>>" ) return "" # <<novalue>> is a help for dealing with empty arrays and objects | |
704 | ||
705 | # if it looks like a number, encode it as such. Cant tell a string from a number. | |
706 | if (a "" ~ /^(\-?)(0|[123456789][0123456789]*)(\.[0123456789]+)?([eE][+-]?[0123456789]+)?$/) | |
707 | return a | |
708 | ||
709 | return useJSON_EscapeString(a) | |
710 | } | |
711 | ||
712 | split(memberList, memberArr, SUBSEP) | |
713 | arrCount = useJSON_ArrayCount( memberArr ) | |
714 | ||
715 | if ( arrCount >= 0 ) | |
716 | { | |
717 | allLines = "[" (pretty == 0 ? "" : "\n") | |
718 | ||
719 | for ( ii = 1 ; ii <= arrCount ; ii++ ) | |
720 | allLines = allLines useJSON_PrettyFormat(useJSON_FormatInt( jsonData, jsonSchema, prefix (prefix == "" ? "" : SUBSEP) ii, (pretty != 0 ? pretty+1 : 0)) (ii < arrCount ? "," : ""), pretty != 0 ? pretty+1 : 0) | |
721 | allLines = allLines useJSON_PrettyFormat("]", pretty) | |
722 | ||
723 | return allLines | |
724 | } | |
725 | ||
726 | allLines = "{" (pretty == 0 ? "" : "\n") | |
727 | ||
728 | ii = 0 | |
729 | ||
730 | arrCount = 0 | |
731 | for (a in memberArr) | |
732 | arrCount++ | |
733 | ||
734 | utlJSON_qsortArray(memberArr, 1, arrCount) | |
735 | ||
736 | for ( ii = 1 ; ii <= arrCount ; ii++ ) | |
737 | allLines = allLines useJSON_PrettyFormat(useJSON_EscapeString(memberArr[ii]) (pretty == 0 ? ":" : " : ") useJSON_FormatInt(jsonData, jsonSchema, prefix (prefix == "" ? "" : SUBSEP) memberArr[ii], (pretty != 0 ? pretty+1 : 0)) (ii < arrCount ? "," : ""), pretty != 0 ? pretty+1 : 0) | |
738 | ||
739 | allLines = allLines useJSON_PrettyFormat("}", pretty) | |
740 | ||
741 | return allLines | |
742 | } | |
743 | ||
744 | # | |
745 | # Entry Points | |
746 | # | |
747 | ||
748 | # | |
749 | # ParseJSON : Parse JSON text into an awk array | |
750 | # | |
751 | # jsonString : JSON text | |
752 | # jsonData : array of parsed JSON data | |
753 | # | |
754 | # returns : N/A | |
755 | # | |
756 | function ParseJSON(jsonString, jsonData, jsonStringArr, cnt) | |
757 | { | |
758 | # newlines split differently in some awks, replace them with formfeeds (also white space) | |
759 | # if (split("1\n2\n3", jsonData, ",") == 3) # is this an awk that splits newlines differently? | |
760 | gsub(/\n/, "\f", jsonString) # always replace literal newlines - allows compatibility when testing | |
761 | ||
762 | split("", jsonData) # clear the array jsonData | |
763 | cnt = split(jsonString, jsonStringArr, ",") | |
764 | prsJSON_ParseJSONInt(jsonStringArr, cnt, 1, jsonData, "") | |
765 | } | |
766 | ||
767 | # | |
768 | # FormatJSON : Format parsed JSON data back into JSON text | |
769 | # | |
770 | # jsonData : array of parsed JSON data | |
771 | # pretty : 0 = compact format, non-zero = pretty format | |
772 | # | |
773 | # returns : string with JSON text | |
774 | # | |
775 | function FormatJSON(jsonData, pretty, jsonSchema) | |
776 | { | |
777 | useJSON_GetSchema(jsonData, jsonSchema) | |
778 | return useJSON_FormatInt(jsonData, jsonSchema, "", pretty ? 1 : 0) | |
779 | } | |
780 | ||
781 | # | |
782 | # JSONArrayLength : Find number of members in a JSON array | |
783 | # | |
784 | # jsonData : array of parsed JSON data | |
785 | # prefix : array name | |
786 | # | |
787 | # returns : number of entries in the array | |
788 | # | |
789 | function JSONArrayLength(jsonData, prefix, a, cnt, tv) | |
790 | { | |
791 | cnt = -1 | |
792 | ||
793 | for (a in jsonData) | |
794 | { | |
795 | if (prefix == "" || index(a, prefix) == 1) | |
796 | { | |
797 | tv = substr(a, prefix == "" ? 1 : (1+length(prefix)+1)) | |
798 | if ( index(tv, SUBSEP) ) | |
799 | tv = substr(tv, 1, index(tv, SUBSEP)-1) | |
800 | tv = tv + 0 | |
801 | if ( tv > cnt ) | |
802 | cnt = tv | |
803 | } | |
804 | } | |
805 | ||
806 | return cnt | |
807 | } | |
808 | ||
809 | # | |
810 | # JSONUnescapeString : turn a JSON-escaped string into UTF-8 | |
811 | # | |
812 | # jsonString : the escaped JSON string to convert | |
813 | # | |
814 | # returns : the string in UTF-8 | |
815 | # | |
816 | function JSONUnescapeString(jsonString) | |
817 | { | |
818 | return prsJSON_UnescapeString(jsonString) | |
819 | } | |
820 | ||
821 | # | |
822 | # JSONIsTrue : return non-zero if the value is the true value | |
823 | # | |
824 | # jsonValue : the value to test | |
825 | # | |
826 | # returns : true or false | |
827 | # | |
828 | function JSONIsTrue(jsonValue) | |
829 | { | |
830 | return jsonValue == "<<true>>"; | |
831 | } | |
832 | ||
833 | # | |
834 | # JSONIsFalse : return non-zero if the value is the false value | |
835 | # | |
836 | # jsonValue : the value to test | |
837 | # | |
838 | # returns : true or false | |
839 | # | |
840 | function JSONIsFalse(jsonValue) | |
841 | { | |
842 | return jsonValue == "<<false>>"; | |
843 | } | |
844 | ||
845 | # | |
846 | # JSONIsNull : return non-zero if the value is the null value | |
847 | # | |
848 | # jsonValue : the value to test | |
849 | # | |
850 | # returns : true or false | |
851 | # | |
852 | function JSONIsNull(jsonValue) | |
853 | { | |
854 | return jsonValue == "<<null>>"; | |
855 | } | |
856 | ||
857 | # | |
858 | # JSONObjectMembers : get the set of members of an object | |
859 | # | |
860 | # jsonData : array of parsed JSON data | |
861 | # prefix : object name | |
862 | # memberArr : [out] an array of the names of the object members, if the target was an object or an array | |
863 | # | |
864 | # returns : If the target was actually an array rather than an object, the number of elements in the array | |
865 | # Else, zero if the target was an object or a value | |
866 | # | |
867 | function JSONObjectMembers(jsonData, prefix, memberArr, jsonSchema, memberList, rv, a) | |
868 | { | |
869 | useJSON_GetSchema(jsonData, jsonSchema) | |
870 | memberList = useJSON_GetObjectMembers(jsonSchema, prefix) | |
871 | ||
872 | if ( memberList == "" ) | |
873 | { | |
874 | split("", memberArr) | |
875 | return 0 | |
876 | } | |
877 | ||
878 | split(memberList, memberArr, SUBSEP) | |
879 | rv = useJSON_ArrayCount( memberArr ) | |
880 | if ( rv == -1 ) # not an array, sort the object member names | |
881 | { | |
882 | rv = 0 | |
883 | for (a in memberArr) | |
884 | rv++ | |
885 | ||
886 | utlJSON_qsortArray(memberArr, 1, rv) | |
887 | rv = 0 | |
888 | } | |
889 | return rv | |
890 | } | |
891 | # End of Copyright (c) 2010 Dan Saar | |
892 | ||
893 | function debug(lvl, text) { | |
894 | if (lvl <= DEBUG) | |
895 | print text > "/dev/stderr" | |
896 | } | |
897 | ||
898 | function wpt_init() { | |
899 | available = "True" | |
900 | archived = "False" | |
901 | sym = "Geocache" | |
902 | json_log_bool = 0 | |
903 | logs = "" | |
904 | logs_section = 0 | |
905 | hints = "" | |
906 | lat = "" | |
907 | yy = 0 | |
908 | wplist = "" | |
909 | nattr_yes = 0 | |
910 | nattr_no = 0 | |
911 | gs_type = "" | |
912 | } | |
913 | ||
914 | function umlauts(text) { | |
915 | # Somewhat minimal translation of HTML entities in titles | |
916 | gsub("ä", "\xc3\xa4", text) | |
917 | gsub("ö", "\xc3\xb6", text) | |
918 | gsub("ü", "\xc3\xbc", text) | |
919 | gsub("Ä", "\xc3\x84", text) | |
920 | gsub("Ö", "\xc3\x96", text) | |
921 | gsub("Ü", "\xc3\x9c", text) | |
922 | gsub("ß", "\xc3\x9f", text) | |
923 | gsub("°", "\xc2\xb0", text) | |
924 | gsub("&", "\\&", text) | |
925 | return text | |
926 | } | |
927 | ||
928 | function htmlclean(text) { | |
929 | gsub(" ", " ", text) | |
930 | gsub("</?[pP][^>]*>", "\n", text) | |
931 | gsub("<[bB][rR][^>]*>", "\n", text) | |
932 | gsub("<[^>]*>", "", text) | |
933 | # compress whitespace | |
934 | gsub("\n\n\n*", "\n\n", text) | |
935 | gsub("[ \t][ \t]*", " ", text) | |
936 | return text | |
937 | } | |
938 | ||
939 | function tableclean(text) { | |
940 | gsub("\n", "", text) | |
941 | gsub(" ", " ", text) | |
942 | # translate/remove HTML tags | |
943 | gsub("</?[pP][^>]*>", "\n", text) | |
944 | gsub("</[bB][rR][^>]*>", "", text) | |
945 | gsub("</?font[^>]*>", "", text) | |
946 | gsub("</?table[^>]*>", "", text) | |
947 | gsub("<t[rdh]>", "", text) | |
948 | gsub("</tr>", "\n", text) | |
949 | gsub("</t[dh][^>]*>", " | ", text) | |
950 | gsub("<[^>]*>", "", text) | |
951 | # compress whitespace | |
952 | gsub("[ \t][ \t]*", " ", text) | |
953 | return text | |
954 | } | |
955 | ||
956 | function remdiv(text, tag) { | |
957 | if (tag != "") | |
958 | pat = ".*<div id=." tag ".[^>]*>[ \t\n]*" | |
959 | else | |
960 | pat = ".*<div[^>]*>[ \t\n]*" | |
961 | sub(pat, "", text) | |
962 | while (text !~ "/?div") | |
963 | { | |
964 | if (getline more <= 0) | |
965 | break | |
966 | text = text "\n" more | |
967 | } | |
968 | sub("[ \t\n]*</div>.*", "", text) | |
969 | debug(3, "Div:\n" text) | |
970 | return text | |
971 | } | |
972 | ||
973 | function remspan(text, tag) { | |
974 | if (tag != "") | |
975 | pat = ".*<span id=." tag ".[^>]*>[ \t\n]*" | |
976 | else | |
977 | pat = ".*<span[^>]*>[ \t\n]*" | |
978 | sub(pat, "", text) | |
979 | while (text !~ "/?span") | |
980 | { | |
981 | if (getline more <= 0) | |
982 | break | |
983 | text = text "\n" more | |
984 | } | |
985 | sub("[ \t\n]*</span>.*", "", text) | |
986 | debug(3, "Span:\n" text) | |
987 | return text | |
988 | } | |
989 | ||
990 | function remspanlong(text, tag) { | |
991 | if (tag != "") | |
992 | pat = ".*<span id=." tag ".[^>]*>[ \t\n]*" | |
993 | else | |
994 | pat = ".*<span[^>]*>[ \t\n]*" | |
995 | sub(pat, "", text) | |
996 | # i = "span level" | |
997 | i = 1; j = 0 | |
998 | debug(2, length(text) "\t" i " " j++ " " text) | |
999 | # input is in text | |
1000 | while (i != 0) | |
1001 | { | |
1002 | # emergency exit | |
1003 | if (length(text) > 500000) | |
1004 | { | |
1005 | debug(0, "Warning: logs exceeded 500,000 bytes!") | |
1006 | break | |
1007 | } | |
1008 | # cleanup: remove </*span...>, adjust "span level" | |
1009 | while (text ~ "</*span.*>") | |
1010 | { | |
1011 | if (text ~ "</span>") | |
1012 | { | |
1013 | --i; sub("</span>", "", text) | |
1014 | } | |
1015 | if (text ~ "<span.*>") | |
1016 | { | |
1017 | ++i; sub("<span[^>]*>", "", text) | |
1018 | } | |
1019 | } | |
1020 | debug(2, "=" length(text) "\t" i " " j++ " " text) | |
1021 | # if "span level" down to zero, closing tag reached | |
1022 | if (i == 0) break | |
1023 | # get more input | |
1024 | if (getline more <= 0) | |
1025 | break | |
1026 | text = text "\n" more | |
1027 | debug(2, "+" length(more) "\t" i " " j++ " " more) | |
1028 | } | |
1029 | debug(1, length(text) "\t" i " " j++) | |
1030 | sub("[ \t\n]*</span>.*", "", text) | |
1031 | gsub(" ", " ", text) | |
1032 | if (tag == "CacheLogs") | |
1033 | gsub("</?table[^>]*>", "", text) | |
1034 | debug(3, "SpanLong:\n" text) | |
1035 | return text | |
1036 | } | |
1037 | ||
1038 | function remwaypoints() { | |
1039 | text = "" | |
1040 | while (text !~ "</table>" && text !~ "No additional waypoints to display") | |
1041 | { | |
1042 | if (getline more <= 0) | |
1043 | break | |
1044 | text = text " " more | |
1045 | } | |
1046 | gsub(" ", " ", text) | |
1047 | gsub("\n[ \t]*", "", text) | |
1048 | debug(3, "Waypoints:\n" text "\nEnd Waypoints") | |
1049 | return text | |
1050 | # will return complete table contents! split by </tr> instead of | |
1051 | # <STRONG><img...> | |
1052 | } | |
1053 | ||
1054 | function splitwaypoints(waypoints, | |
1055 | line, fld, prefix, lookup, wpname, x, y, lat, lon) { | |
1056 | text="" | |
1057 | # separate lines | |
1058 | split(waypoints, wps, "</tr>") | |
1059 | i = 0 | |
1060 | for (wp in wps) | |
1061 | ++i | |
1062 | wp = 1 # skip header line | |
1063 | while (wp < i) | |
1064 | { | |
1065 | ++wp | |
1066 | # get URL from full table line | |
1067 | url = wps[wp] | |
1068 | gsub(".*href=.", "", url) | |
1069 | gsub("\".*", "", url) | |
1070 | if (url !~ "^http:") | |
1071 | { | |
1072 | url = "" | |
1073 | } | |
1074 | else | |
1075 | { | |
1076 | debug(1, "url: " url) | |
1077 | } | |
1078 | # individual fields without leading/trailing blanks, remove HTML tags | |
1079 | split(wps[wp], line, "</td>") | |
1080 | j = 0 | |
1081 | for (fld in line) | |
1082 | { | |
1083 | ++j | |
1084 | debug(2, "Before Line[" fld "]: " line[fld]) | |
1085 | gsub("[ \t]*<[^>]*>", "", line[fld]) | |
1086 | gsub("^[ \t]*", "", line[fld]) | |
1087 | gsub("[ \t]*$", "", line[fld]) | |
1088 | debug(2, "after Line[" fld "]: " line[fld]) | |
1089 | } | |
1090 | # 8 fields: 1st line old style | |
1091 | # 9 fields: 1st line new style | |
1092 | # 4 fields, [1]~"Note:": 2nd line old style | |
1093 | # 4 fields, [2]~"Note:": 2nd line new style | |
1094 | # else: drop | |
1095 | if (j == 8) | |
1096 | { | |
1097 | # main information line, old style (pre-2010/07) | |
1098 | if (!line[3]) continue | |
1099 | prefix = substr(line[3] "00", 1, 2) | |
1100 | lookup = line[4] | |
1101 | wpname = line[5] | |
1102 | lat = toupper(line[6]) | |
1103 | gsub(" *[EW].*", "", lat) | |
1104 | split(lat, y) | |
1105 | lat = y[2] + y[3]/60.0 | |
1106 | if (y[1] == "S") | |
1107 | lat = -lat | |
1108 | lon = toupper(line[6]) | |
1109 | gsub("[NS] *[0-9]*.. *[0-9.]* ", "", lon) | |
1110 | gsub("[^ 0-9.NESW-]", "", lon) | |
1111 | split(lon, x) | |
1112 | lon = x[2] + x[3]/60.0 | |
1113 | if (x[1] == "W") | |
1114 | lon = -lon | |
1115 | text = text sprintf("\nlat=\"%.6f\" lon=\"%.6f\"|%s|%s|%s|%s", | |
1116 | lat, lon, prefix, lookup, wpname, url) | |
1117 | } | |
1118 | else if (j == 9) | |
1119 | { | |
1120 | # main information line, new style (2010/07) | |
1121 | if (!line[4]) continue | |
1122 | prefix = substr(line[4] "00", 1, 2) | |
1123 | lookup = line[5] | |
1124 | wpname = line[6] | |
1125 | lat = toupper(line[7]) | |
1126 | gsub(" *[EW].*", "", lat) | |
1127 | split(lat, y) | |
1128 | lat = y[2] + y[3]/60.0 | |
1129 | if (y[1] == "S") | |
1130 | lat = -lat | |
1131 | lon = toupper(line[7]) | |
1132 | gsub("[NS] *[0-9]*.. *[0-9.]* ", "", lon) | |
1133 | gsub("[^ 0-9.NESW-]", "", lon) | |
1134 | split(lon, x) | |
1135 | lon = x[2] + x[3]/60.0 | |
1136 | if (x[1] == "W") | |
1137 | lon = -lon | |
1138 | text = text sprintf("\nlat=\"%.6f\" lon=\"%.6f\"|%s|%s|%s|%s", | |
1139 | lat, lon, prefix, lookup, wpname, url) | |
1140 | } | |
1141 | else if (j == 4) | |
1142 | { | |
1143 | if (line[1] ~ "Note:") | |
1144 | { | |
1145 | # continuation line, old style | |
1146 | text = text sprintf("|%s", line[2]) | |
1147 | } | |
1148 | else if (line[2] ~ "Note:") | |
1149 | { | |
1150 | # continuation line, new style | |
1151 | text = text sprintf("|%s", line[3]) | |
1152 | } | |
1153 | } | |
1154 | } | |
1155 | debug(3, "Split WPs\n" text) | |
1156 | return text | |
1157 | } | |
1158 | ||
1159 | function wpclean(waypoints, line, fld, prefix, lookup, wpname, coords) { | |
1160 | # simplify Additional Waypoints table: | |
1161 | # prefixedname - name<br>coordfield<br>note | |
1162 | text = "" | |
1163 | split(waypoints, wps, "</tr>") | |
1164 | i = 0 | |
1165 | for (wp in wps) | |
1166 | ++i | |
1167 | wp = 1 | |
1168 | while (wp < i) | |
1169 | { | |
1170 | ++wp | |
1171 | split(wps[wp], line, "</td>") | |
1172 | j = 0 | |
1173 | for (fld in line) | |
1174 | { | |
1175 | ++j | |
1176 | gsub("[ \t]*<[^>]*>", "", line[fld]) | |
1177 | gsub("^[ \t]*", "", line[fld]) | |
1178 | gsub("[ \t]*$", "", line[fld]) | |
1179 | } | |
1180 | # 8 fields: 1st line old style | |
1181 | # 9 fields: 1st line new style | |
1182 | # 4 fields, [1]~"Note:": 2nd line old style | |
1183 | # 4 fields, [2]~"Note:": 2nd line new style | |
1184 | # else: drop | |
1185 | if (j == 8) | |
1186 | { | |
1187 | # main information line, old style (pre-2010/07) | |
1188 | if (!line[3]) continue | |
1189 | prefix = substr(line[3] "00", 1, 2) substr(gcid, 3) | |
1190 | lookup = line[4] | |
1191 | wpname = line[5] | |
1192 | gsub(" \\(.*\\).*", "", wpname) | |
1193 | coords = toupper(line[6]) | |
1194 | text = text sprintf("%s - %s<br />%s<br />", prefix, wpname, coords) | |
1195 | } | |
1196 | else if (j == 9) | |
1197 | { | |
1198 | # main information line, new style (2010/07) | |
1199 | if (!line[4]) continue | |
1200 | prefix = substr(line[4] "00", 1, 2) substr(gcid, 3) | |
1201 | lookup = line[5] | |
1202 | wpname = line[6] | |
1203 | gsub(" \\(.*\\).*", "", wpname) | |
1204 | coords = toupper(line[7]) | |
1205 | text = text sprintf("%s - %s<br />%s<br />", prefix, wpname, coords) | |
1206 | } | |
1207 | else if(j == 4) | |
1208 | { | |
1209 | if (line[1] ~ "Note:") | |
1210 | { | |
1211 | # continuation line, old style | |
1212 | text = text sprintf("%s<br />", line[2]) | |
1213 | } | |
1214 | else if (line[2] ~ "Note:") | |
1215 | { | |
1216 | # continuation line, new style | |
1217 | text = text sprintf("%s<br />", line[3]) | |
1218 | } | |
1219 | } | |
1220 | } | |
1221 | debug(3, "Clean WPs\n" text) | |
1222 | return text | |
1223 | } | |
1224 | ||
1225 | function hex2dec(x, val) { | |
1226 | for (val = 0; length(x); x = substr(x, 2)) | |
1227 | val = 16*val + index("0123456789ABCDEF", substr(x, 1, 1)) - 1 | |
1228 | return val | |
1229 | } | |
1230 | ||
1231 | # Convert GC0000 to 58913 | |
1232 | function wp2id(wp, val) { | |
1233 | sub("^GC", "", wp) | |
1234 | debug(5, "wp2id: " wp " ...") | |
1235 | if ((length(wp) <= 4) && (wp < "G000")) | |
1236 | { | |
1237 | # old hex style | |
1238 | val = hex2dec(wp) | |
1239 | debug(5, "wp2id hex: " val " ...") | |
1240 | return val | |
1241 | } | |
1242 | # new style, base-31, can have 4 or more places! | |
1243 | set = "0123456789ABCDEFGHJKMNPQRTVWXYZ" | |
1244 | val = 0 | |
1245 | for (pos = 1; pos <= length(wp); ++pos) | |
1246 | { | |
1247 | val *= 31 | |
1248 | val += index(set, substr(wp, pos, 1)) - 1 | |
1249 | } | |
1250 | val = val - 411120 | |
1251 | debug(5, "wp2id id: " val " ...") | |
1252 | return val | |
1253 | } | |
1254 | ||
1255 | # to decode hints: rot13 http://lorance.freeshell.org/rot13/ | |
1256 | function rot13 (string) { | |
1257 | ROTFROM = "nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM" | |
1258 | ROTTO = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" | |
1259 | retstr = "" | |
1260 | for (pos = 0; pos < length(string); pos++) | |
1261 | { | |
1262 | char = substr(string,pos + 1,1) | |
1263 | rotpos = index(ROTFROM,char) | |
1264 | if (rotpos > 0) | |
1265 | char = substr(ROTTO,rotpos,1) | |
1266 | retstr = retstr char | |
1267 | } | |
1268 | return retstr | |
1269 | } | |
1270 | ||
1271 | function tagstart(lvl, tag, parms) { | |
1272 | printf "%*s", lvl*2, "" | |
1273 | if (parms == "") | |
1274 | printf "<%s>\n", tag | |
1275 | else | |
1276 | printf "<%s %s>\n", tag, parms | |
1277 | } | |
1278 | ||
1279 | function tagend(lvl, tag) { | |
1280 | printf "%*s", lvl*2, "" | |
1281 | printf "</%s>\n", tag | |
1282 | } | |
1283 | ||
1284 | function ee(text) { | |
1285 | gsub(/&/, "\\&", text) | |
1286 | gsub(/</, "\\<", text) | |
1287 | gsub(/>/, "\\>", text) | |
1288 | return text | |
1289 | } | |
1290 | ||
1291 | function tagtext(lvl, tag, text) { | |
1292 | text = ee(text) | |
1293 | printf "%*s", lvl*2, "" | |
1294 | printf "<%s>%s</%s>\n", tag, text, tag | |
1295 | } | |
1296 | ||
1297 | function tagptext(lvl, tag, parms, text) { | |
1298 | text = ee(text) | |
1299 | printf "%*s", lvl*2, "" | |
1300 | printf "<%s %s>%s</%s>\n", tag, parms, text, tag | |
1301 | } | |
1302 | ||
1303 | function attr_begin1(gif, id, text) { | |
1304 | debug(1, "attr_begin1: " gif " " id " \"" text "\"") | |
1305 | attr_id[gif] = id; attr_text[gif] = text | |
1306 | debug(1, "attr_id: " attr_id["slealth"]) | |
1307 | debug(1, "attr_id: " attr_id[gif]) | |
1308 | } | |
1309 | function attr_begin() { | |
1310 | # attr_begin1("slealth", 40, "Stealth required") Dont work!!! | |
1311 | attr_id["dog"] = 1; attr_text["dog"] = "Dogs" | |
1312 | attr_id["dogs"] = 1; attr_text["dogs"] = "Dogs allowed" | |
1313 | attr_id["fee"] = 2; attr_text["fee"] = "Access or parking fee" | |
1314 | attr_id["rappelling"] = 3; attr_text["rappelling"] = "Climbing gear" | |
1315 | attr_id["boat"] = 4; attr_text["boat"] = "Boat" | |
1316 | attr_id["scuba"] = 5; attr_text["scuba"] = "Scuba gear" | |
1317 | attr_id["kids"] = 6; attr_text["kids"] = "Recommended for kids" | |
1318 | attr_id["onehour"] = 7; attr_text["onehour"] = "Takes less than an hour" | |
1319 | attr_id["scenic"] = 8; attr_text["scenic"] = "Scenic view" | |
1320 | attr_id["hiking"] = 9; attr_text["hiking"] = "Significant hike" | |
1321 | ||
1322 | attr_id["climbing"] = 10; attr_text["climbing"] = "Difficult climbing" | |
1323 | attr_id["wading"] = 11; attr_text["wading"] = "May require wading" | |
1324 | attr_id["swimming"] = 12; attr_text["swimming"] = "May require swimming" | |
1325 | attr_id["available"] = 13; attr_text["available"] = "Available at all times" | |
1326 | attr_id["night"] = 14; attr_text["night"] = "Recommended at night" | |
1327 | attr_id["winter"] = 15; attr_text["winter"] = "Available during winter" | |
1328 | # 16 | |
1329 | attr_id["poisonoak"] = 17; attr_text["poisonoak"] = "Poison plants" | |
1330 | attr_id["dangerousanimals"] = 18; attr_text["dangerousanimals"] = "Dangerous Animals" | |
1331 | attr_id["ticks"] = 19; attr_text["ticks"] = "Ticks" | |
1332 | ||
1333 | attr_id["mines"] = 20; attr_text["mines"] = "Abandoned mines" | |
1334 | attr_id["cliff"] = 21; attr_text["cliff"] = "Cliff / falling rocks" | |
1335 | attr_id["hunting"] = 22; attr_text["hunting"] = "Hunting" | |
1336 | attr_id["danger"] = 23; attr_text["danger"] = "Dangerous area" | |
1337 | attr_id["wheelchair"] = 24; attr_text["wheelchair"] ="Wheelchair accessible" | |
1338 | attr_id["parking"] = 25; attr_text["parking"] = "Parking available" | |
1339 | attr_id["public"] = 26; attr_text["public"] = "Public transportation" | |
1340 | attr_id["water"] = 27; attr_text["water"] = "Drinking water nearby" | |
1341 | attr_id["restrooms"] = 28; attr_text["restrooms"] ="Public restrooms nearby" | |
1342 | attr_id["phone"] = 29; attr_text["phone"] = "Telephone nearby" | |
1343 | ||
1344 | attr_id["picnic"] = 30; attr_text["picnic"] = "Picnic tables nearby" | |
1345 | attr_id["camping"] = 31; attr_text["camping"] = "Camping available" | |
1346 | attr_id["bicycles"] = 32; attr_text["bicycles"] = "Bicycles" | |
1347 | attr_id["motorcycles"] = 33; attr_text["motorcycles"] = "Motorcycles" | |
1348 | attr_id["quads"] = 34; attr_text["quads"] = "Quads" | |
1349 | attr_id["jeeps"] = 35; attr_text["jeeps"] = "Off-road vehicles" | |
1350 | attr_id["snowmobiles"] = 36; attr_text["snowmobiles"] = "Snowmobiles" | |
1351 | attr_id["horses"] = 37; attr_text["horses"] = "Horses" | |
1352 | attr_id["campfires"] = 38; attr_text["campfires"] = "Campfires" | |
1353 | attr_id["thorns"] = 39; attr_text["thorns"] = "Thorns" | |
1354 | ||
1355 | attr_id["stealth"] = 40; attr_text["stealth"] = "Stealth required" | |
1356 | attr_id["stroller"] = 41; attr_text["stroller"] = "Stroller accessible" | |
1357 | attr_id["firstaid"] = 42; attr_text["firstaid"] = "Needs maintenance" | |
1358 | attr_id["cow"] = 43; attr_text["cow"] = "Watch for livestock" | |
1359 | attr_id["flashlight"] = 44; attr_text["flashlight"] = "Flashlight required" | |
1360 | attr_id["landf"] = 44; attr_text["landf"] = "Lost And Found Tour" | |
1361 | attr_id["rv"] = 46; attr_text["rv"] = "Recreational Vehicle" | |
1362 | attr_id["field"] = 47; attr_text["field"] = "Field Puzzle" | |
1363 | attr_id["UV"] = 48; attr_text["UV"] = "UV Light Required" | |
1364 | attr_id["snowshoes"] = 49; attr_text["snowshoes"] = "Snowshoes" | |
1365 | ||
1366 | attr_id["skiis"] = 50; attr_text["skiis"] = "Cross Country Skis" | |
1367 | attr_id["s-tool"] = 51; attr_text["s-tool"] = "Special Tool Required" | |
1368 | attr_id["nightcache"] = 52; attr_text["nightcache"] = "Night Cache" | |
1369 | attr_id["parkngrab"] = 53; attr_text["parkngrab"] = "Park and Grab" | |
1370 | attr_id["AbandonedBuilding"] = 54; attr_text["AbandonedBuilding"] = "Abandoned Structure" | |
1371 | attr_id["hike_short"] = 55; attr_text["hike_short"] = "Short hike (less than 1km)" | |
1372 | attr_id["hike_med"] = 56; attr_text["hike_med"] = "Medium hike (1km-10km)" | |
1373 | attr_id["hike_long"] = 57; attr_text["hike_long"] = "Long hike (+10km)" | |
1374 | attr_id["fuel"] = 58; attr_text["fuel"] = "Fuel Nearby" | |
1375 | attr_id["food"] = 59; attr_text["food"] = "Food Nearby" | |
1376 | ||
1377 | attr_id["wirelessbeacon"] = 60; attr_text["wirelessbeacon"] = "Wireless Beacon" | |
1378 | attr_id["partnership"] = 61; attr_text["partnership"] = "Partnership" | |
1379 | attr_id["seasonal"] = 62; attr_text["seasonal"] = "Seasonal Access" | |
1380 | attr_id["tourist"] = 63; attr_text["tourist"] = "Tourist Friendly" | |
1381 | attr_id["treeclimbing"] = 64; attr_text["treeclimbing"] = "Tree Climbing" | |
1382 | attr_id["frontyard"] = 65; attr_text["frontyard"] = "Front Yard (Private Residence)" | |
1383 | attr_id["teamwork"] = 66; attr_text["teamwork"] = "Teamwork Required" | |
1384 | } | |
1385 | ||
1386 | function tagattr(lvl, kind, yesno) { | |
1387 | kind = kind "" | |
1388 | #debug(1, "kind: \"" kind "\"") | |
1389 | if (attr_id[kind] == 0) | |
1390 | return | |
1391 | printf "%*s", lvl*2, "" | |
1392 | printf "<groundspeak:attribute id=\"%d\" inc=\"%d\">", attr_id[kind], yesno | |
1393 | printf "%s", attr_text[kind] | |
1394 | printf "</groundspeak:attribute>\n" | |
1395 | } | |
1396 | ||
1397 | /cache_types.aspx/ { # gc 02/01/11 | |
1398 | gs_type = $0 | |
1399 | sub(/.* alt=./, "", gs_type) | |
1400 | sub(/. width=.*/, "", gs_type) | |
1401 | debug(1, "type: " gs_type) | |
1402 | } | |
1403 | /<span id="ctl00_ContentBody_CacheName">/ { | |
1404 | if (gs_type) | |
1405 | { | |
1406 | gs_name = remspan($0, "ctl00_ContentBody_CacheName") | |
1407 | next | |
1408 | } | |
1409 | gs_type = $0 | |
1410 | sub(/.* alt=./, "", gs_type) | |
1411 | sub(/. width=.*/, "", gs_type) | |
1412 | debug(1, "type: " gs_type) | |
1413 | } | |
1414 | /<span id="CacheName">/ { gs_name = remspan($0, "CacheName") } | |
1415 | /<span id="ctl00_ContentBody_CacheName">/ { | |
1416 | gs_name = remspan($0, "ctl00_ContentBody_CacheName") | |
1417 | } | |
1418 | /<span id=".*WaypointName".*>/ { gcid = remspan($0) } | |
1419 | /;wp=GC.*" / { | |
1420 | # new way, yech! | |
1421 | gcid = $0; sub(/.*wp=/, "", gcid); sub(/".*/, "", gcid) | |
1422 | } | |
1423 | /<span id=".*ShortDescription">/ { | |
1424 | gs_short_description = remspan($0) | |
1425 | } | |
1426 | /<span id="LongDescription">/ { | |
1427 | gs_long_description = remspanlong($0, "LongDescription") | |
1428 | waypoints = "" | |
1429 | } | |
1430 | /<span id="ctl00_ContentBody_LongDescription">/ { | |
1431 | gs_long_description = remspanlong($0, "ctl00_ContentBody_LongDescription") | |
1432 | waypoints = "" | |
1433 | } | |
1434 | /<div id="div_hint"/ { | |
1435 | hints = remdiv($0) | |
1436 | gsub("\n", " ", hints) | |
1437 | gsub("^ *", "", hints) | |
1438 | gsub("<br>", "\n", hints) | |
1439 | if (DECODE) | |
1440 | hints=rot13(hints) | |
1441 | } | |
1442 | /<span id="Hints"/ { | |
1443 | hints = remspan($0) | |
1444 | hints = htmlclean(hints) | |
1445 | if (DECODE) | |
1446 | hints=rot13(hints) | |
1447 | gsub("\n", " ", hints) | |
1448 | } | |
1449 | /<span id="ctl00_ContentBody_Hints"/ { | |
1450 | hints = $0 | |
1451 | sub(".*displayMe.>", "", hints) | |
1452 | sub("</span>.*", "", hints) | |
1453 | gsub("<br>", "\n", hints) | |
1454 | # debug(1, "Hints: " hints) | |
1455 | if (DECODE) | |
1456 | hints=rot13(hints) | |
1457 | } | |
1458 | /<b>Additional Waypoints/ { | |
1459 | waypoints = remwaypoints() | |
1460 | wplist = splitwaypoints(waypoints) | |
1461 | } | |
1462 | /<strong>Additional Waypoints/ { | |
1463 | waypoints = remwaypoints() | |
1464 | wplist = splitwaypoints(waypoints) | |
1465 | } | |
1466 | /class="LogsTable Table"/ { # old | |
1467 | logs_section = 1 | |
1468 | } | |
1469 | /class="LogsTable"/ { # new 06/28/11 | |
1470 | logs_section = 1 | |
1471 | } | |
1472 | (logs_section > 0) { | |
1473 | logs = logs $0 | |
1474 | } | |
1475 | (logs_section > 0) && /<table/ { | |
1476 | logs_section += 1 | |
1477 | } | |
1478 | (logs_section > 0) && /<\/table>/ { | |
1479 | logs_section -= 1 | |
1480 | } | |
1481 | ||
1482 | /<span id="CacheLogs">/ { | |
1483 | logs = remspanlong($0, "CacheLogs") | |
1484 | # remove header which does not exist >2010-01-12 | |
1485 | sub(".*td class=.containerHeader.>Cache Logs</td></tr>", "", logs) | |
1486 | } | |
1487 | /<span id="ctl00_ContentBody_CacheLogs">/ { | |
1488 | logs = remspanlong($0, "ctl00_ContentBody_CacheLogs") | |
1489 | } | |
1490 | /<span id=".*CacheStats">/ { stats = remspan($0) } | |
1491 | /<span id=".*NumVisits">/ { | |
1492 | numvisits = remspan($0) | |
1493 | debug(1, numvisits) | |
1494 | } | |
1495 | ||
1496 | /lnkPrintFriendly/ { | |
1497 | gid = $0 | |
1498 | if (gid ~ /ID=/) | |
1499 | { | |
1500 | # Printable page has ID number | |
1501 | sub(/^.*ID=/, "", gid) | |
1502 | sub(/&.*/, "", gid) | |
1503 | } | |
1504 | else | |
1505 | { | |
1506 | # Non-printable page has guid number | |
1507 | sub(/^.*guid=/, "", gid) | |
1508 | sub(/&.*/, "", gid) | |
1509 | } | |
1510 | } | |
1511 | /^ *by <a href/ { | |
1512 | gs_owner = $0 | |
1513 | sub(/.*ds=2.>/, "", gs_owner) | |
1514 | sub(/<.*/, "", gs_owner) | |
1515 | debug(1, "owner: " gs_owner) | |
1516 | gs_guid = $0 | |
1517 | sub(/.*guid=/, "", gs_guid) | |
1518 | sub(/&.*/, "", gs_guid) | |
1519 | } | |
1520 | /.* alt=.Size/ { | |
1521 | gs_size = $0 | |
1522 | sub(/.*Size: /, "", gs_size); sub(". />.*", "", gs_size) | |
1523 | } | |
1524 | /<span id="CacheOwner"/ { | |
1525 | text = remspan($0) | |
1526 | debug(1, "Owner text " text) | |
1527 | gs_type = text; sub(/<.*/, "", gs_type) | |
1528 | gs_owner = text | |
1529 | debug(1, gs_owner) | |
1530 | sub(/.*<br>by /, "", gs_owner); sub(/ [[].*/, "", gs_owner) | |
1531 | debug(1, gs_owner) | |
1532 | sub(/<a[^>]*>/, "", gs_owner) | |
1533 | sub(/<.a[^>]*>/, "", gs_owner) | |
1534 | sub(/.*<br .>/, "", gs_owner) | |
1535 | sub(/^by /, "", gs_owner) | |
1536 | debug(1, "owner " gs_owner) | |
1537 | gs_size = text; sub(/.*Size: /, "", gs_size); sub(/<.*/, "", gs_size) | |
1538 | gs_guid = text; sub(/.*guid=/, "", gs_guid) | |
1539 | sub(/&.*/, "", gs_guid) | |
1540 | debug(1, "guid " gs_guid) | |
1541 | } | |
1542 | /<span id="ctl00_ContentBody_CacheOwner"/ { | |
1543 | text = $0 | |
1544 | debug(2, "Owner text: " text) | |
1545 | gs_type = text | |
1546 | sub(/<br .*/, "", gs_type) | |
1547 | sub(/.*>/, "", gs_type) | |
1548 | debug(1, "gs_type: " gs_type) | |
1549 | ||
1550 | gs_owner = text | |
1551 | sub(/.*ds=2.>/, "", gs_owner); sub(/<.*/, "", gs_owner) | |
1552 | debug(1, "gs_owner: " gs_owner) | |
1553 | ||
1554 | gs_size = text; sub(/.*Size: /, "", gs_size); sub(/<.*/, "", gs_size) | |
1555 | gs_guid = text; sub(/.*guid=/, "", gs_guid) | |
1556 | sub(/&.*/, "", gs_guid) | |
1557 | sub(/. title=.*/, "", gs_guid) | |
1558 | debug(1, "guid: " gs_guid) | |
1559 | } | |
1560 | /<span id="ErrorText"/ { | |
1561 | if ($0 ~ "unavailable") | |
1562 | available = "False" | |
1563 | if ($0 ~ "been archived") | |
1564 | archived = "True" | |
1565 | } | |
1566 | /<span id="ctl00_ContentBody_ErrorText"/ { | |
1567 | errortext = remspan($0, "ctl00_ContentBody_ErrorText") | |
1568 | if (errortext ~ "unavailable") | |
1569 | available = "False" | |
1570 | if (errortext ~ "been archived") | |
1571 | archived = "True" | |
1572 | debug(1, "available: " available "; archived: " archived) | |
1573 | } | |
1574 | /<span id="LargeMapPrint"/ { | |
1575 | text = remspan($0) | |
1576 | lat = text; sub(/.*latitude=/, "", lat); sub(/&.*/, "", lat) | |
1577 | lon = text; sub(/.*longitude=/, "", lon); sub(/\".*/, "", lon) | |
1578 | sub(/&.*/, "", lon) | |
1579 | } | |
1580 | /var lat=[-0-9]/ { | |
1581 | if (lat == "") | |
1582 | { | |
1583 | lat = $0; sub(/.*lat=/, "", lat); sub(/;.*/, "", lat) | |
1584 | lon = $0; sub(/.*lng=/, "", lon); sub(/;.*/, "", lon) | |
1585 | } | |
1586 | } | |
1587 | /<span id=".*Location"/ { | |
1588 | text = remspan($0) | |
1589 | gs_state = text | |
1590 | sub(/In */, "", gs_state) | |
1591 | sub(/,.*/, "", gs_state) | |
1592 | ||
1593 | gs_country = text; | |
1594 | sub(/.*, /, "", gs_country) | |
1595 | sub(/ <.*/, "", gs_country) | |
1596 | sub(/^In /, "", gs_country) | |
1597 | } | |
1598 | /lat=.*; lng=.*; guid=/ { | |
1599 | if (lat == "") | |
1600 | { | |
1601 | lat = $0; sub(/.*lat=/, "", lat); sub(/;.*/, "", lat) | |
1602 | lon = $0; sub(/.*lng=/, "", lon); sub(/;.*/, "", lon) | |
1603 | } | |
1604 | } | |
1605 | /<span class="minorCacheDetails">Hidden/ { # gc 2/1/11 | |
1606 | getline time | |
1607 | getline time | |
1608 | sub(/^ */, "", time) | |
1609 | sub(/<.*/, "", time) | |
1610 | split(time, fld, "/") | |
1611 | time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2]) | |
1612 | debug(1, "time: " time) | |
1613 | } | |
1614 | /> <span class="minorCacheDetails">/ { # gc 6/28/11 | |
1615 | getline time | |
1616 | getline time | |
1617 | getline time | |
1618 | sub(/^ */, "", time) | |
1619 | sub(/<.*/, "", time) | |
1620 | gsub(/-/, "/", time) | |
1621 | rc = split(time, fld, "/") | |
1622 | if (rc == 1) | |
1623 | rc = split(time, fld, "-") | |
1624 | debug(1, "timerc: " rc) | |
1625 | if (DATEFMT == 1) | |
1626 | time = sprintf("%d-%02d-%02d", fld[3], fld[2], fld[1]) | |
1627 | else if (fld[1] >= 1000) | |
1628 | time = sprintf("%d-%02d-%02d", fld[1], fld[2], fld[3]) | |
1629 | else | |
1630 | time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2]) | |
1631 | debug(1, "time: " time) | |
1632 | } | |
1633 | /<span id="DateHidden">/ { | |
1634 | getline text | |
1635 | time = remspan($0) | |
1636 | split(time, fld, "/") | |
1637 | time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2]) | |
1638 | } | |
1639 | /<span id="ctl00_ContentBody_DateHidden">/ { | |
1640 | time = remspan($0, "ctl00_ContentBody_DateHidden") | |
1641 | rc = split(time, fld, "/") | |
1642 | if (rc == 3) | |
1643 | { | |
1644 | time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2]) | |
1645 | debug(1, "time: " time) | |
1646 | next | |
1647 | } | |
1648 | rc = split(time, fld, ",") | |
1649 | if (rc == 3) | |
1650 | { | |
1651 | yyyy = fld[3]; | |
1652 | split(fld[2], fld, " ") | |
1653 | mm = Month[ fld[1] ] | |
1654 | dd = fld[2] | |
1655 | time = sprintf("%d-%02d-%02d", yyyy, mm, dd) | |
1656 | debug(1, "time: " time) | |
1657 | next | |
1658 | } | |
1659 | time = "" | |
1660 | } | |
1661 | /ctl00_ContentBody_uxLegendScale/ { | |
1662 | text = $0 | |
1663 | sub(/.*alt=./, "", text); sub(/ .*/, "", text) | |
1664 | gs_diff = text | |
1665 | debug(1 , "gs_diff: " gs_diff) | |
1666 | } | |
1667 | /ctl00_ContentBody_Localize6/ { | |
1668 | text = $0 | |
1669 | sub(/.*alt=./, "", text); sub(/ .*/, "", text) | |
1670 | gs_terr = text | |
1671 | debug(1 , "gs_terr: " gs_terr) | |
1672 | } | |
1673 | /^ *Difficulty:<.strong>/ { | |
1674 | getline text | |
1675 | sub(/.*alt=./, "", text); sub(/ .*/, "", text) | |
1676 | gs_diff = text | |
1677 | debug(1 , "gs_diff: " gs_diff) | |
1678 | } | |
1679 | /^ *Difficulty:/ { # gc 2/1/11 | |
1680 | getline text | |
1681 | getline text | |
1682 | getline text | |
1683 | sub(/.*alt=./, "", text); sub(/ .*/, "", text) | |
1684 | gs_diff = text | |
1685 | debug(1 , "gs_diff: " gs_diff) | |
1686 | } | |
1687 | /<span id="Difficulty">/ { | |
1688 | text = remspan($0) | |
1689 | sub(/.*alt=./, "", text); sub(/ .*/, "", text) | |
1690 | gs_diff = text | |
1691 | } | |
1692 | /<span id="ctl00_ContentBody_Difficulty">/ { | |
1693 | text = remspan($0, "ctl00_ContentBody_Difficulty") | |
1694 | sub(/.*alt=./, "", text); sub(/ .*/, "", text) | |
1695 | debug(1, "difficulty " text) | |
1696 | gs_diff = text | |
1697 | } | |
1698 | /^ *Terrain:<.strong>/ { | |
1699 | getline text | |
1700 | sub(/.*alt=./, "", text); sub(/ .*/, "", text) | |
1701 | gs_terr = text | |
1702 | debug(1 , "gs_terr: " gs_terr) | |
1703 | } | |
1704 | /^ *Terrain:/ { # gc 2/1/11 | |
1705 | getline text | |
1706 | getline text | |
1707 | getline text | |
1708 | sub(/.*alt=./, "", text); sub(/ .*/, "", text) | |
1709 | gs_terr = text | |
1710 | debug(1 , "gs_terr: " gs_terr) | |
1711 | } | |
1712 | /<span id="Terrain">/ { | |
1713 | text = remspan($0) | |
1714 | sub(/.*alt=./, "", text); sub(/ .*/, "", text) | |
1715 | gs_terr = text | |
1716 | } | |
1717 | /<span id="ctl00_ContentBody_Terrain">/ { | |
1718 | text = remspan($0, "ctl00_ContentBody_Terrain") | |
1719 | sub(/.*alt=./, "", text); sub(/ .*/, "", text) | |
1720 | debug(1, "terrain " text) | |
1721 | gs_terr = text | |
1722 | } | |
1723 | /title=.What are Attributes?/ { | |
1724 | text = $0 | |
1725 | debug(5, "Attr " text) | |
1726 | gsub("<img src=./images/attributes/", "", text) | |
1727 | # before 06/03/10 | |
1728 | gsub(/alt="[^"]*" width="30" height="30" .>/, "", text) | |
1729 | # after 06/03/10 | |
1730 | gsub(/alt="[^"]*" title="[^"]*" width="30" height="30" .>/, "", text) | |
1731 | gsub("<p class=.NoSpacing.*", "", text) | |
1732 | gsub(/^ */, "", text) | |
1733 | gsub(/\.gif../, "", text) | |
1734 | gsub(/attribute-blank/, "", text) | |
1735 | ||
1736 | attrs_yes = text | |
1737 | gsub(/[a-z0-9A-Z]*-no/, "", attrs_yes) | |
1738 | gsub(/-yes/, "", attrs_yes) | |
1739 | ||
1740 | attrs_no = text | |
1741 | gsub(/[a-z0-9A-Z]*-yes/, "", attrs_no) | |
1742 | gsub(/-no/, "", attrs_no) | |
1743 | ||
1744 | debug(1, "attrs_yes: " attrs_yes) | |
1745 | debug(1, "attrs_no: " attrs_no) | |
1746 | nattr_yes = split(attrs_yes, attr_yes, " ") | |
1747 | nattr_no = split(attrs_no, attr_no, " ") | |
1748 | debug(1, "nattr_yes: " nattr_yes) | |
1749 | debug(1, "nattr_no: " nattr_no) | |
1750 | } | |
1751 | /^{.status.:.success/ { | |
1752 | ParseJSON($0, json_logs) | |
1753 | json_log_bool = 1 | |
1754 | } | |
1755 | ||
1756 | BEGIN { | |
1757 | Month["January"] = 1 | |
1758 | Month["February"] = 2 | |
1759 | Month["March"] = 3 | |
1760 | Month["April"] = 4 | |
1761 | Month["May"] = 5 | |
1762 | Month["June"] = 6 | |
1763 | Month["July"] = 7 | |
1764 | Month["August"] = 8 | |
1765 | Month["September"] = 9 | |
1766 | Month["October"] = 10 | |
1767 | Month["November"] = 11 | |
1768 | Month["December"] = 12 | |
1769 | BaseURL = "http://www.geocaching.com/seek/cache_details.aspx" | |
1770 | attr_begin() | |
1771 | ||
1772 | first = 1 | |
1773 | ||
1774 | wpt_init() | |
1775 | } | |
1776 | /<\/html>/ { | |
1777 | if ((lat == "") || (lon == "")) | |
1778 | { | |
1779 | debug(0, "Waypoint coordinates not found for " gcid ", no output!") | |
1780 | #next | |
1781 | } | |
1782 | ||
1783 | # too long a block to be indented | |
1784 | if (!INCR && first) | |
1785 | { | |
1786 | print "<?xml version=\"1.0\" encoding=\"utf-8\"?>" | |
1787 | tagstart(0, "gpx") | |
1788 | tagtext(1, "desc", "Geocache file generated by geo-html2gpx") | |
1789 | tagtext(1, "author", "geo-html2gpx") | |
1790 | "date +%Y-%m-%dT%H:%M:%S" | getline date | |
1791 | tagtext(1, "time", date) | |
1792 | first = 0 | |
1793 | } | |
1794 | ||
1795 | gs_name = umlauts(gs_name) | |
1796 | gs_owner = umlauts(gs_owner) | |
1797 | ||
1798 | tagstart(1, "wpt", "lat=\"" lat "\" lon=\"" lon "\"") | |
1799 | if (time != "") | |
1800 | tagtext(2, "time", time "T00:00:00.0000000-07:00") | |
1801 | tagtext(2, "name", gcid) | |
1802 | tagtext(2, "desc", gs_name " by " gs_owner ", " \ | |
1803 | gs_type " (" gs_diff "/" gs_terr ")") | |
1804 | ||
1805 | # alternate URL... tagtext(2, "url", BaseURL "?wp=" gcid) | |
1806 | # alternate URL... tagtext(2, "url", BaseURL "?id=" gid) | |
1807 | tagtext(2, "url", BaseURL "?wp=" gcid) | |
1808 | tagtext(2, "urlname", gs_name) | |
1809 | ||
1810 | # we do this last... tagtext(2, "sym", sym) | |
1811 | ||
1812 | tagtext(2, "type", "Geocache|" gs_type) | |
1813 | ||
1814 | # FIXME? GC-written GPX files contain numeric, non-UUID, | |
1815 | # cache/owner/finder ids | |
1816 | # Oregon needs numeric cache id, or behaves erratically! | |
1817 | gid = wp2id(gcid) | |
1818 | tagstart(2, "groundspeak:cache", | |
1819 | "id=\"" gid "\" available=\"" available \ | |
1820 | "\" archived=\"" archived "\"" \ | |
1821 | " xmlns:groundspeak=\"http://www.groundspeak.com/cache/1/0/1\"") | |
1822 | tagtext(3, "groundspeak:name", gs_name) | |
1823 | tagtext(3, "groundspeak:placed_by", gs_owner) | |
1824 | tagptext(3,"groundspeak:owner", "id=\"" gs_guid "\"", gs_owner) | |
1825 | tagtext(3, "groundspeak:type", gs_type) | |
1826 | ||
1827 | if (nattr_yes != 0 || nattr_no != 0) | |
1828 | { | |
1829 | tagstart(3, "groundspeak:attributes") | |
1830 | for (i = 1; i <= nattr_yes; ++i) | |
1831 | tagattr(4, attr_yes[i], 1) | |
1832 | for (i = 1; i <= nattr_no; ++i) | |
1833 | tagattr(4, attr_no[i], 0) | |
1834 | tagend(3, "groundspeak:attributes") | |
1835 | } | |
1836 | ||
1837 | tagtext(3, "groundspeak:container", gs_size) | |
1838 | tagtext(3, "groundspeak:difficulty", gs_diff) | |
1839 | tagtext(3, "groundspeak:terrain", gs_terr) | |
1840 | tagtext(3, "groundspeak:country", gs_country) | |
1841 | tagtext(3, "groundspeak:state", gs_state) | |
1842 | if (!NOHTML) | |
1843 | { | |
1844 | tagptext(3, "groundspeak:short_description", "html=\"True\"", | |
1845 | gs_short_description) | |
1846 | if (!NOWPTS && waypoints) | |
1847 | { | |
1848 | # reproduce "simplified table" by GC PQ | |
1849 | # prefixed_gcid - wpname<br />original_style_coord<br />note<br /> | |
1850 | waypoints = wpclean(waypoints) | |
1851 | # include "zero" waypoints here! | |
1852 | gs_long_description = gs_long_description \ | |
1853 | "<p>Additional Waypoints</p>" waypoints | |
1854 | } | |
1855 | tagptext(3, "groundspeak:long_description", "html=\"True\"", | |
1856 | gs_long_description) | |
1857 | } | |
1858 | else | |
1859 | { | |
1860 | gs_short_description = htmlclean(gs_short_description) | |
1861 | tagptext(3, "groundspeak:short_description", "html=\"False\"", | |
1862 | gs_short_description) | |
1863 | gs_long_description = htmlclean(gs_long_description) | |
1864 | if (waypoints) | |
1865 | gs_long_description = gs_long_description \ | |
1866 | "\n\nAdditional Waypoints\n" tableclean(waypoints) | |
1867 | tagptext(3, "groundspeak:long_description", "html=\"False\"", | |
1868 | gs_long_description) | |
1869 | } | |
1870 | tagtext(3, "groundspeak:encoded_hints", hints) | |
1871 | ||
1872 | if (json_log_bool) | |
1873 | { | |
1874 | nlogs = JSONArrayLength(json_logs, "data") | |
1875 | debug(1, "New Logs: " nlogs) | |
1876 | if (nlogs > 1) | |
1877 | tagstart(3, "groundspeak:logs") | |
1878 | else | |
1879 | tagstart(3, "groundspeak:logs", "/") | |
1880 | ||
1881 | for (i = 1; i < nlogs; ++i) | |
1882 | { | |
1883 | ltype = json_logs["data" SUBSEP i SUBSEP "LogTypeImage"] | |
1884 | if (ltype ~ /smile/) ltype = "Found it" | |
1885 | else if (ltype ~ /happy/) ltype = "Found it" | |
1886 | else if (ltype ~ /note/) ltype = "Write note" | |
1887 | else if (ltype ~ /sad/) ltype = "Didn'"'"'t Find it" | |
1888 | else if (ltype ~ /attended/) ltype = "Attended" | |
1889 | else if (ltype ~ /rsvp/) ltype = "Will Attend" | |
1890 | else if (ltype ~ /greenlight/) ltype = "Green" | |
1891 | else if (ltype ~ /traffic_cone/) ltype = "Archive" | |
1892 | else if (ltype ~ /disabled/) ltype = "Temporarily Disable Listing" | |
1893 | else if (ltype ~ /coord_update/) ltype = "Update Coordinates" | |
1894 | else ltype = "Unknown" | |
1895 | ||
1896 | ldate = json_logs["data" SUBSEP i SUBSEP "Visited"] | |
1897 | lfinder = json_logs["data" SUBSEP i SUBSEP "UserName"] | |
1898 | logid = json_logs["data" SUBSEP i SUBSEP "LogID"] | |
1899 | guid = json_logs["data" SUBSEP i SUBSEP "LogGuid"] | |
1900 | ltext = json_logs["data" SUBSEP i SUBSEP "LogText"] | |
1901 | ltext = htmlclean(ltext) | |
1902 | ltext = umlauts(ltext) | |
1903 | ||
1904 | if (lfinder == USERNAME && ltype == "Found it") | |
1905 | sym = "Geocache Found" | |
1906 | if (lfinder == USERNAME && ltype == "Attended") | |
1907 | sym = "Geocache Found" | |
1908 | tagstart(4, "groundspeak:log", "id=\"" logid "\"") | |
1909 | tagtext(5, "groundspeak:date", ldate) | |
1910 | tagtext(5, "groundspeak:type", ltype) | |
1911 | tagptext(5, "groundspeak:finder", "id=\"" guid "\"", lfinder) | |
1912 | tagptext(5, "groundspeak:text", "encoded=\"" "False" "\"", ltext) | |
1913 | tagend(4, "groundspeak:log") | |
1914 | } | |
1915 | ||
1916 | if (nlogs > 1) | |
1917 | tagend(3, "groundspeak:logs") | |
1918 | } | |
1919 | else | |
1920 | { | |
1921 | # nlogs = split(logs, entry, "</tr>") | |
1922 | nlogs = split(logs, entry, "</tr><tr>") | |
1923 | if (nlogs > NUMLOGS+1) | |
1924 | nlogs = NUMLOGS+1 | |
1925 | ||
1926 | if (nlogs > 1) | |
1927 | tagstart(3, "groundspeak:logs") | |
1928 | else | |
1929 | tagstart(3, "groundspeak:logs", "/") | |
1930 | ||
1931 | for (i = 1; i < nlogs; ++i) | |
1932 | { | |
1933 | sub("<tr><td[^>]*>", "", entry[i]) | |
1934 | sub("</td>", "", entry[i]) | |
1935 | if (!entry[i]) continue | |
1936 | # old split location | |
1937 | sub(/.*<[Ss][Tt][Rr][Oo][Nn][Gg]><img src=./, "", entry[i]) | |
1938 | ||
1939 | ltype = entry[i] | |
1940 | #debug(1, "log: " ltype) | |
1941 | sub(/>.*/, "", ltype) # leaves the URL of the smiley | |
1942 | if (ltype ~ /smile/) ltype = "Found it" | |
1943 | else if (ltype ~ /happy/) ltype = "Found it" | |
1944 | else if (ltype ~ /note/) ltype = "Write note" | |
1945 | else if (ltype ~ /sad/) ltype = "Didn'"'"'t Find it" | |
1946 | else if (ltype ~ /attended/) ltype = "Attended" | |
1947 | else if (ltype ~ /rsvp/) ltype = "Will Attend" | |
1948 | else if (ltype ~ /greenlight/) ltype = "Green" | |
1949 | else if (ltype ~ /traffic_cone/) ltype = "Archive" | |
1950 | else if (ltype ~ /disabled/) ltype = "Temporarily Disable Listing" | |
1951 | else if (ltype ~ /coord_update/) ltype = "Update Coordinates" | |
1952 | else ltype = "Unknown" | |
1953 | ||
1954 | ldate = entry[i] | |
1955 | # split off /blank | |
1956 | sub(/^[^>]*>[^ ;]*[ ;]/, "", ldate) | |
1957 | sub(/ by <.*/, "", ldate) | |
1958 | sub(/ by /, "", ldate) | |
1959 | sub(/.*LogDate.>about /, "", ldate) | |
1960 | sub(/.*LogDate.>/, "", ldate) | |
1961 | sub(/<.*/, "", ldate) | |
1962 | gsub(/-/, "/", ldate) | |
1963 | debug(1, "logdate: " ldate) | |
1964 | if (ldate ~ /ago/) | |
1965 | { | |
1966 | cmd = sprintf("%s -d \"12am %s\" +%%Y-%%m-%%dT07:00:00Z", | |
1967 | DATE, ldate) | |
1968 | cmd | getline ldate; close(cmd) | |
1969 | } | |
1970 | else | |
1971 | { | |
1972 | n = split(ldate, fld, " ") | |
1973 | if (n >= 2) | |
1974 | { | |
1975 | #old format: August 18 | |
1976 | mm = Month[fld[1]] | |
1977 | dd = fld[2] + 0 | |
1978 | if (n >= 3) | |
1979 | yy = fld[3] | |
1980 | if (yy+0 == 0) | |
1981 | yy = YR | |
1982 | ldate = sprintf("%d-%02d-%02dT07:00:00", yy, mm, dd) | |
1983 | } | |
1984 | n = split(ldate, fld, "/") | |
1985 | if (n == 3) | |
1986 | { | |
1987 | #new format: 08/18/2011 | |
1988 | if (DATEFMT == 1) | |
1989 | ldate = sprintf("%d-%02d-%02dT07:00:00", | |
1990 | fld[3], fld[2], fld[1]) | |
1991 | else | |
1992 | ldate = sprintf("%d-%02d-%02dT07:00:00", | |
1993 | fld[3], fld[1], fld[2]) | |
1994 | debug(1, "logdate: " ldate) | |
1995 | } | |
1996 | } | |
1997 | ||
1998 | lfinder = entry[i] | |
1999 | sub(/[^<]*</, "", lfinder) # Delete all before <A NAME... | |
2000 | ||
2001 | logid = lfinder | |
2002 | sub(/[^"]*"/, "", logid) | |
2003 | sub(/.* id="/, "", logid) | |
2004 | sub(/.*LUID=/, "", logid) | |
2005 | sub(/\".*/, "", logid) | |
2006 | debug(1, "logid: " logid) | |
2007 | ||
2008 | guid = lfinder | |
2009 | debug(1, "guid: " guid) | |
2010 | #sub(/[^>]*>/, "", guid) # Delete all before <A HREF... | |
2011 | #sub(/>.*/, "", guid) # Delete all after <A HREF... | |
2012 | sub(/.*guid=/, "", guid) | |
2013 | sub(/\".*/, "", guid) | |
2014 | sub(/\&.*/, "", guid) | |
2015 | sub(/. id=.*/, "", guid) | |
2016 | debug(1, "guid: " guid) | |
2017 | ||
2018 | #debug(1, "lfinder: " lfinder) | |
2019 | sub(/[^>]*>/, "", lfinder) # Delete all before <A HREF... | |
2020 | #debug(1, "lfinder: " lfinder) | |
2021 | #sub(/[^>]*>/, "", lfinder) # Delete all before name | |
2022 | sub(/<.*/, "", lfinder) # Delete all after name | |
2023 | lfinder = umlauts(lfinder) | |
2024 | debug(1, "lfinder: " lfinder) | |
2025 | ||
2026 | ltext = entry[i] | |
2027 | sub(/.*found\)<br .>/, "", ltext) | |
2028 | sub("</font>.*", "", ltext) | |
2029 | sub("<a href=.log.aspx[^>]*>[^<]*</a>", "", ltext) | |
2030 | sub("<a href=.upload.aspx[^>]*>[^<]*</a>", "", ltext) | |
2031 | # remove remaining HTML tags from log text. Seems to be a good | |
2032 | # idea in any case, independent of NOHTML setting! | |
2033 | ltext = htmlclean(ltext) | |
2034 | ltext = umlauts(ltext) | |
2035 | ||
2036 | if (lfinder == USERNAME && ltype == "Found it") | |
2037 | sym = "Geocache Found" | |
2038 | if (lfinder == USERNAME && ltype == "Attended") | |
2039 | sym = "Geocache Found" | |
2040 | tagstart(4, "groundspeak:log", "id=\"" logid "\"") | |
2041 | tagtext(5, "groundspeak:date", ldate) | |
2042 | tagtext(5, "groundspeak:type", ltype) | |
2043 | tagptext(5, "groundspeak:finder", "id=\"" guid "\"", lfinder) | |
2044 | tagptext(5, "groundspeak:text", "encoded=\"" "False" "\"", ltext) | |
2045 | tagend(4, "groundspeak:log") | |
2046 | } | |
2047 | if (nlogs > 1) | |
2048 | tagend(3, "groundspeak:logs") | |
2049 | } | |
2050 | ||
2051 | tagstart(3, "groundspeak:travelbugs", "/") | |
2052 | ||
2053 | tagend(2, "groundspeak:cache") | |
2054 | tagtext(2, "sym", sym) | |
2055 | tagend(1, "wpt") | |
2056 | ||
2057 | # add Additional Waypoints in wpt form | |
2058 | if (!NOWPTS && wplist) | |
2059 | { | |
2060 | split(wplist, wps, "\n") | |
2061 | i = 0 | |
2062 | for (wp in wps) | |
2063 | ++i | |
2064 | wp = 0 | |
2065 | while (wp < i) | |
2066 | { | |
2067 | ++wp | |
2068 | # lat lon|prefix|lookup|wpname|url|note | |
2069 | # i.e.: lat="44.888267" lon="-93.159233"|PC|PARK|http://... | |
2070 | # |GCPMG6-Parking (Parking Area)|.31 miles from cache. | |
2071 | debug(1, "wps: " wps[wp]) | |
2072 | split(wps[wp], line, "|") | |
2073 | if (line[1] && | |
2074 | (!NOZERO || (line[1] !~ "lat=\"0.000000\" lon=\"0.000000\"") ) ) | |
2075 | { | |
2076 | # line format: coords|prefix|lookup|wpname|note | |
2077 | tagstart(1, "wpt", line[1]) | |
2078 | #tagtext(2, "time", "...") | |
2079 | tagtext(2, "name", line[2] substr(gcid,3)) | |
2080 | tagtext(2, "cmt", line[6] ? line[6] : "") | |
2081 | statname = line[4] | |
2082 | gsub(" \\(.*\\).*", "", statname) | |
2083 | ||
2084 | desc = line[4] | |
2085 | sub(" \\(.*", "", desc) | |
2086 | tagtext(2, "desc", desc) | |
2087 | ||
2088 | tagtext(2, "url", line[5]) | |
2089 | ||
2090 | urlname = desc | |
2091 | tagtext(2, "urlname", urlname) | |
2092 | ||
2093 | stattype = line[4] | |
2094 | gsub(".*\\(", "", stattype) | |
2095 | gsub("\\).*", "", stattype) | |
2096 | tagtext(2, "sym", stattype) | |
2097 | tagtext(2, "type", "Waypoint|" stattype) | |
2098 | tagend(1, "wpt") | |
2099 | } | |
2100 | } | |
2101 | } | |
2102 | wpt_init() | |
2103 | } | |
2104 | END { | |
2105 | if (!INCR && !first) | |
2106 | tagend(0, "gpx") | |
2107 | } | |
2108 | ' | $POSTPROC |