]> git.street.me.uk Git - andy/viking.git/blame - tools/geo-html2gpx
Enable importing from an mbtiles file to Viking cache via the python tool.
[andy/viking.git] / tools / geo-html2gpx
CommitLineData
06ee5545
RN
1#!/bin/sh
2
3PROGNAME="$0"
4
5usage() {
6 cat <<EOF
7NAME
8 `basename $PROGNAME`- Convert gc.com *printable* web pages into GPX
9
10SYNOPSIS
11 `basename $PROGNAME` [options] [gc-com.html]...
12
13DESCRIPTION
14 Convert gc.com *printable* web pages into GPX, including
15 cache description and all logs.
16
17 The *printable* web pages can be fetched using geo-nearest,
18 geo-newest, geo-placed, geo-found, or geo-gid with the -H option.
19
20OPTIONS
21 -b Normalize output by postprocessing with gpsbabel
22 -e Encode hints with rot13 (e.g. NORTH = ABEGU)
23 -i Incremental, no XML and GPX headers
24 -l number Maximum number of log entries to be exported [unlimited]
25 -n No HTML in descriptions (experimental)
26 -o FMT Output FMT instead of GPX by using gpsbabel
27 -u username Indicate found status for username [$USERNAME]
28 -w Do not add "Additional Waypoints" to the GPX output
29 -z Do not output waypoints with "zero" coordinates
30 -E var=val Set environment "var" to "val"
31 i.e. DATEFMT=0|1
32 -D lvl Debug level
33
34DEFAULTS
35 Defaults can also be set with variables in file \$HOME/.georc:
36
37 DATEFMT=[0|1];
38
39DATE FORMATS
40 Geocaching.com date formats that are compatible:
41
42 GC Format Example Compatible
43 YYYY-MM-DD 2011-07-13 yes
44 YYYY/MM/DD 2011/07/13 yes
45 MM/DD/YYYY 07/13/2011 yes
46 DD/MM/YYYY 13/07/2011 yes if DATEFMT=1 in \$HOME/.georc
47 DD/Mmm/YYYY 13/Jul/2001 no
48 Mmm/DD/YYYY Jul/13/2011 no
49 DD Mmm YY 13 Jul 11 yes (english only)
50
cffb6fa9
RN
51 Change them here:
52
53 http://www.geocaching.com/account/ManagePreferences.aspx
54
06ee5545
RN
55EXAMPLES
56 Convert into GPX:
57
58 geo-found -n9999 -H. > /dev/null
59 geo-html2gpx *.html > found.gpx
60EOF
61
62 exit 1
63}
64
65#
66# Report an error and exit
67#
68error() {
69 echo "`basename $PROGNAME`: $1" >&2
70 exit 1
71}
72
73debug() {
74 if [ $DEBUG -ge $1 ]; then
75 echo "`basename $PROGNAME`: $2" >&2
76 fi
77}
78
79if [ `uname` = 'Darwin' ]; then
80 awk=gawk
81 date=gdate
82else
83 awk=awk
84 date=date
85fi
86
87#
88# Read RC file, if there is one
89#
90USERNAME=
91if [ -f $HOME/.georc ]; then
92 . $HOME/.georc
93 # N.B. must switch to read_rc_file if LAT/LON is ever needed here
94fi
95#
96
97# Process the options
98#
99POSTPROC="cat"
100DEBUG=0
101INCR=0
102NOWPTS=0
103NOZERO=0
104NOHTML=0
105DECODE=1
106NUMLOGS=1000000
107while getopts "beE:iwzl:no:u:D:h?" opt
108do
109 case $opt in
110 b) POSTPROC="gpsbabel -igpx -f- -ogpx -F-";;
111 e) DECODE=0;;
112 E) eval "$OPTARG";;
113 i) INCR=1;;
114 l) NUMLOGS="$OPTARG";;
115 o) POSTPROC="gpsbabel -igpx -f- -o$OPTARG -F-";;
116 n) NOHTML=1;;
117 u) USERNAME="$OPTARG";;
118 w) NOWPTS=1;;
119 z) NOZERO=1;;
120 D) DEBUG="$OPTARG";;
121 h|\?) usage;;
122 esac
123done
124shift `expr $OPTIND - 1`
125
126#
127# Main Program
128#
129YR=`date +"%Y"`
130
cffb6fa9
RN
131case `$awk --version` in
132"GNU Awk"*) ;;
133*) error "awk is not GNU awk!";;
134esac
135
136cat "$@" \
137| tr -d '\001\002\003\004\005\006\007\015\021\022\023\024\026\030' \
06ee5545
RN
138| sed 's/<A /\
139<A /g' |
140$awk -vDEBUG=$DEBUG -vINCR=$INCR \
141 -vNOWPTS=$NOWPTS -vNOZERO=$NOZERO \
142 -vNOHTML=$NOHTML \
143 -vDECODE=$DECODE \
144 -vUSERNAME="$USERNAME" \
145 -vDATE="$date" \
146 -vDATEFMT="$DATEFMT" \
147 -vYR="$YR" -vNUMLOGS=$NUMLOGS \
148'
149# Copyright (c) 2010 Dan Saar
150#
151# Permission is hereby granted, free of charge, to any person obtaining a copy
152# of this software and associated documentation files (the "Software"), to deal
153# in the Software without restriction, including without limitation the rights
154# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
155# copies of the Software, and to permit persons to whom the Software is
156# furnished to do so, subject to the following conditions:
157#
158# The above copyright notice and this permission notice shall be included in
159# all copies or substantial portions of the Software.
160#
161# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
162# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
163# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
164# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
165# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
166# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
167# THE SOFTWARE.
168
169function prsJSON_hex2num(s, rv, ii, len, k)
170{
171 rv = 0
172 s = tolower(s)
173 len = length(s)
174
175 for (ii = 1; ii <= len; ii++)
176 {
177 k = index("0123456789abcdef", substr(s, ii, 1))
178 if (k > 0)
179 rv = rv * 16 + (k-1)
180 else
181 break;
182 }
183
184 return rv
185}
186
187function prsJSON_EncodeAsUTF8( v, s, p1, p2, p3, p4, cs )
188{
189 cs = "\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377"
190
191 if ( v < 128 )
192 s = sprintf("%c", v )
193
194 else if ( v < 2048 ) # 110xxxxx 10xxxxxx
195 {
196 p1 = int(v/64) % 32
197 p2 = v % 64
198 s = substr(cs, 65+p1, 1) substr(cs, p2+1, 1)
199 }
200
201 else if ( v < 65536 ) # 1110xxxx 10xxxxxx 10xxxxxx
202 {
203 p1 = int(v/4096) % 16
204 p2 = int(v/64) % 64
205 p3 = v % 64
206 s = substr(cs, 97+p1, 1) substr(cs, p2+1, 1) substr(cs, p3+1, 1)
207 }
208
209 else if ( v < 1114112 ) # 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
210 {
211 p1 = int(v/262144) % 8
212 p2 = int(v/4096) % 64
213 p3 = int(v/64) % 64
214 p4 = v % 64
215 s = substr(cs, 113+p1, 1) substr(cs, p2+1, 1) substr(cs, p3+1, 1) substr(cs, p4+1, 1)
216 }
217
218 else
219 s = ""
220
221 return s;
222}
223
224function prsJSON_UnescapeString(jsonString, matchedString, matchedValue)
225{
226 if (jsonString == "\"\"")
227 return ""
228
229 if (jsonString ~ /^".+"$/)
230 jsonString = substr(jsonString,2,length(jsonString)-2)
231
232 gsub(/\\\\/, "\\u005C", jsonString)
233 gsub(/\\"/, "\"", jsonString)
234 gsub(/\\\//, "/", jsonString)
235 gsub(/\\b/, "\b", jsonString)
236 gsub(/\\f/, "\f", jsonString)
237 gsub(/\\n/, "\n", jsonString)
238 gsub(/\\r/, "\r", jsonString)
239 gsub(/\\t/, "\t", jsonString)
240
241 if (match(jsonString, /\\[^u]/))
242 return "ParseJSON Error: Invalid String at " jsonString
243
244 # handle encoded UTF-16 surrogates
245 while (match(jsonString, /\\uD[89AaBb][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]\\uD[CcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/))
246 {
247 matchedValue = (prsJSON_hex2num(substr(jsonString, RSTART+2, 4)) % 1024) * 1024 + prsJSON_hex2num(substr(jsonString, RSTART+8, 4)) % 1024 + 65536
248 #print matchedValue, substr(jsonString, RSTART+2, 4), substr(jsonString, RSTART+8, 4)
249 matchedString = prsJSON_EncodeAsUTF8( matchedValue )
250 sub(/\\uD[89AaBb][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]\\uD[CcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/, matchedString, jsonString)
251 }
252
253 while (match(jsonString, /\\u[0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/))
254 {
255 matchedValue = prsJSON_hex2num(substr(jsonString, RSTART+2, 4))
256 matchedString = prsJSON_EncodeAsUTF8( matchedValue )
257 sub(/\\u[0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/, matchedString, jsonString)
258 }
259
260 return jsonString;
261}
262
263function prsJSON_ValidString(jsonString)
264{
265 return jsonString !~ /^ParseJSON Error: Invalid String at /
266}
267
268function prsJSON_SetDataValue(jsonData, prefix, value)
269{
270 jsonData[prefix] = value
271}
272
273function prsJSON_Error(jsonStringArr, cnt, idx, jsonData, message)
274{
275 split("", jsonData)
276 prsJSON_SetDataValue(jsonData, "1", sprintf("ParseJSON Error: %s at ", message) (idx <= cnt ? jsonStringArr[idx] : ""))
277 split("", jsonStringArr)
278 return cnt + 1
279}
280
281function prsJSON_CopyError(jsonData, tv)
282{
283 split("", jsonData)
284 prsJSON_SetDataValue(jsonData, "1", tv[1])
285}
286
287function prsJSON_ParseNumber(jsonStringArr, cnt, idx, jsonData, prefix)
288{
289 if (idx <= cnt)
290 {
291 if (match(jsonStringArr[idx], /^(\-?)(0|[123456789][0123456789]*)(\.[0123456789]+)?([eE][+-]?[0123456789]+)?/))
292 {
293 prsJSON_SetDataValue(jsonData, prefix, substr(jsonStringArr[idx], 1, RLENGTH))
294 jsonStringArr[idx] = length(jsonStringArr[idx]) >= RLENGTH+1 ? substr(jsonStringArr[idx], RLENGTH+1) : ""
295 }
296 else
297 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Number not found") # starts like a number, but doesnt match the REGEX
298 }
299
300 return idx
301}
302
303function prsJSON_ParseString(jsonStringArr, cnt, idx, jsonData, prefix, jsonString, idxn, idxs, idxq, t)
304{
305 if (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == "\"")
306 {
307 idxn = 2
308 jsonString = jsonStringArr[idx]
309
310 do
311 {
312 t = length(jsonString) >= idxn ? substr(jsonString, idxn) : ""
313 idxs = index(t, "\\")
314 idxq = index(t, "\"")
315
316 # no valid close quote found
317 if (idxq == 0)
318 {
319 if (idx == cnt)
320 break;
321
322 idx++
323 jsonString = jsonString "," jsonStringArr[idx]
324 }
325
326 # a valid close quote was found - not before a slash
327 if (idxq != 0 && (idxs == 0 || (idxs != 0 && idxq < idxs)))
328 break;
329
330 if (idxs != 0 && idxq == idxs + 1) # slash quote
331 idxn = idxn + idxq
332
333 else
334 idxn = idxn + idxs + 1
335
336 } while (1)
337
338 if (idxq > 0)
339 {
340 t = substr(jsonString, 1, idxn+idxq-1)
341 if (match(t, /[\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037]/) == 0)
342 {
343 t = prsJSON_UnescapeString(t)
344 if ( prsJSON_ValidString(t) )
345 {
346 prsJSON_SetDataValue(jsonData, prefix, t)
347 jsonStringArr[idx] = length(jsonString) >= idxn+idxq ? substr(jsonString,idxn+idxq) : ""
348 }
349 else
350 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Invalid string")
351 }
352 else
353 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Invalid character in string")
354 }
355 else
356 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Unterminated string")
357 }
358 else
359 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "String expected")
360
361 return idx
362}
363
364function prsJSON_ParseObject(jsonStringArr, cnt, idx, jsonData, prefix, tv )
365{
366 if (idx <= cnt)
367 {
368 sub(/^\{[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip open { and skipwhite
369
370 while (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) != "}")
371 {
372 idx = prsJSON_ParseString(jsonStringArr, cnt, idx, tv, "1")
373
374 if (idx <= cnt && length(tv[1]) == 0)
375 idx = prsJSON_Error(jsonStringArr, cnt, idx, tv, "Empty string used for property name")
376
377 if (idx <= cnt)
378 {
379 sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
380
381 if ( length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == ":" )
382 {
383 sub(/^:[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip colon and skipwhite
384
385 if ( length(jsonStringArr[idx]) > 0 )
386 {
387 idx = prsJSON_ParseJSONInt(jsonStringArr, cnt, idx, jsonData, prefix != "" ? prefix SUBSEP tv[1] : tv[1])
388 if (idx <= cnt)
389 {
390 sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
391
392 if (length(jsonStringArr[idx]) == 0 && idx < cnt)
393 {
394 idx++
395 sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
396 if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) == "}")
397 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected object property")
398 }
399
400 else if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "}")
401 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected object property or closing brace")
402 }
403 }
404 else
405 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected JSON value (1)")
406 }
407 else
408 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected colon")
409 }
410 else
411 prsJSON_CopyError(jsonData, tv)
412 }
413
414 if (idx <= cnt && (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "}"))
415 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected closing brace")
416
417 if (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == "}")
418 sub(/^\}[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip close } and skipwhite
419 }
420
421 return idx
422}
423
424function prsJSON_ParseArray(jsonStringArr, cnt, idx, jsonData, prefix, ii)
425{
426 if (idx <= cnt)
427 {
428 sub(/^\[[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip open bracket and skipwhite
429 ii = 1
430
431 while (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) != "]")
432 {
433 idx = prsJSON_ParseJSONInt(jsonStringArr, cnt, idx, jsonData, prefix != "" ? prefix SUBSEP ii : ii )
434 ii++
435
436 if (idx <= cnt)
437 {
438 sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
439
440 if (length(jsonStringArr[idx]) == 0 && idx < cnt)
441 {
442 idx++;
443 sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
444 if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) == "]")
445 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected array value")
446 }
447
448 else if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "]")
449 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected array value or closing bracket")
450 }
451 }
452
453 if (idx <= cnt && (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "]"))
454 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected closing bracket")
455
456 if (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == "]")
457 sub(/^\][ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip close bracket and skipwhite
458 }
459
460 return idx
461}
462
463function prsJSON_ParseJSONInt(jsonStringArr, cnt, idx, jsonData, prefix, tk)
464{
465 if (idx <= cnt)
466 {
467 sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
468
469 if (length(jsonStringArr[idx]) > 0)
470 {
471 tk = substr(jsonStringArr[idx], 1, 1)
472 if (tk == "\"" && prefix != "")
473 idx = prsJSON_ParseString(jsonStringArr, cnt, idx, jsonData, prefix)
474 else if (tk ~ /^[0123456789-]/ && prefix != "")
475 idx = prsJSON_ParseNumber(jsonStringArr, cnt, idx, jsonData, prefix)
476 else if (jsonStringArr[idx] ~ /^true/ && prefix != "")
477 {
478 prsJSON_SetDataValue(jsonData, prefix, "<<true>>")
479 jsonStringArr[idx] = length(jsonStringArr[idx]) <= 4 ? "" : substr(jsonStringArr[idx],5)
480 }
481 else if (jsonStringArr[idx] ~ /^false/ && prefix != "")
482 {
483 prsJSON_SetDataValue(jsonData, prefix, "<<false>>")
484 jsonStringArr[idx] = length(jsonStringArr[idx]) <= 5 ? "" : substr(jsonStringArr[idx],6)
485 }
486 else if (jsonStringArr[idx] ~ /^null/ && prefix != "")
487 {
488 prsJSON_SetDataValue(jsonData, prefix, "<<null>>")
489 jsonStringArr[idx] = length(jsonStringArr[idx]) <= 4 ? "" : substr(jsonStringArr[idx],5)
490 }
491 else if (tk == "{")
492 idx = prsJSON_ParseObject(jsonStringArr, cnt, idx, jsonData, prefix)
493 else if (tk == "[")
494 idx = prsJSON_ParseArray(jsonStringArr, cnt, idx, jsonData, prefix)
495 else
496 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected JSON value (2)")
497
498 if (idx <= cnt)
499 sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
500 }
501
502 if (prefix == "" && idx <= cnt && length(jsonStringArr[idx]) != 0)
503 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected end of JSON text")
504 else if (prefix == "" && idx+1 <= cnt)
505 {
506 idx++
507 idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected end of JSON text (2)")
508 }
509
510 }
511
512 return idx
513}
514
515#
516# JSON Formatting Routines
517#
518
519function useJSON_ArrayCount( possibleArray, a, min, max, cnt, rv)
520{
521 cnt = 0
522
523 for ( a in possibleArray )
524 {
525 if (possibleArray[a] "" !~ /^[0123456789][0123456789]*$/)
526 return -1
527
528 if ( cnt == 0 )
529 {
530 min = possibleArray[a]
531 max = possibleArray[a]
532 }
533 else
534 {
535 if (min == possibleArray[a] || max == possibleArray[a])
536 return -1
537
538 if (possibleArray[a] < min)
539 min = possibleArray[a]
540
541 if (max < possibleArray[a])
542 max = possibleArray[a]
543 }
544
545 cnt++
546 }
547
548 if (min == 1 && max == cnt)
549 return cnt
550
551 return -1
552}
553
554function useJSON_GetObjectMembers(jsonSchema, prefix)
555{
556 if (prefix == "") prefix = "<<novalue>>"
557 return prefix in jsonSchema ? jsonSchema[prefix] : ""
558}
559
560# quick sort array arr
561function utlJSON_qsortArray(arr, left, right, i, last, t)
562{
563 if (left >= right) # do nothing if array has less than 2 elements
564 return
565 i = left + int((right-left+1)*rand())
566 t = arr[left];
567 arr[left] = arr[i];
568 arr[i] = t
569 last = left # arr[left] is now partition element
570 for (i = left+1; i <= right; i++)
571 {
572 if (arr[i] < arr[left])
573 {
574 last++
575 t = arr[last];
576 arr[last] = arr[i];
577 arr[i] = t
578 }
579 }
580 t = arr[left];
581 arr[left] = arr[last];
582 arr[last] = t
583 utlJSON_qsortArray(arr, left, last-1)
584 utlJSON_qsortArray(arr, last+1, right)
585}
586
587function useJSON_GetSchema(jsonData, jsonSchema, a, tidx, tv, sv, idx)
588{
589 split("", jsonSchema)
590 for (a in jsonData)
591 {
592 while (match(a, SUBSEP "[^" SUBSEP "]+$"))
593 {
594 tidx = substr(a,1,RSTART-1)
595 tv = substr(a,RSTART+1)
596 sv = (tidx in jsonSchema) ? jsonSchema[tidx] : ""
597 # if ( sv != tv && sv !~ "^" tv SUBSEP && sv !~ SUBSEP tv "$" && sv !~ SUBSEP tv SUBSEP )
598 # Rephrase this using index so object member names with regex characters work
599 if ( sv != tv && index(sv, tv SUBSEP) != 1 && (length(sv) <= length(tv)+1 || substr(sv, length(sv)-length(tv)) != SUBSEP tv) && index(sv, SUBSEP tv SUBSEP) == 0 )
600 jsonSchema[tidx] = sv (sv == "" ? "" : SUBSEP) tv
601 a = tidx
602 }
603
604 tidx = "<<novalue>>"
605 tv = a
606 sv = (tidx in jsonSchema) ? jsonSchema[tidx] : ""
607 if ( sv != tv && sv !~ "^" tv SUBSEP && sv !~ SUBSEP tv "$" && sv !~ SUBSEP tv SUBSEP )
608 jsonSchema[tidx] = sv (sv == "" ? "" : SUBSEP) tv
609 }
610}
611
612function useJSON_EscapeString(s, ii, c, t, t2, t3, t4, cs)
613{
614 cs = "\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377"
615 gsub(/\\/, "\\u005C", s)
616 gsub(/"/, "\\\"", s)
617 #gsub(/\//, "\\/", s) # required to decode, but not to encode
618 gsub(/\b/, "\\b", s)
619 gsub(/\f/, "\\f", s)
620 gsub(/\n/, "\\n", s)
621 gsub(/\r/, "\\r", s)
622 gsub(/\t/, "\\t", s)
623
624 for ( ii = 1 ; ii <= length(s) ; ii++ )
625 {
626 t = substr(s,ii,1)
627
628 if (t == "\000") # having \000 in list below doesnt work in all awks
629 {
630 c = 0
631 s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii==length(s) ? "" : substr(s, ii+1))
632 ii += 5
633 }
634 else
635 {
636 c = index("\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037", t)
637 c = c == 0 ? -1 : c
638
639 if ( c >= 0 )
640 {
641 s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii==length(s) ? "" : substr(s, ii+1))
642 ii += 5
643 }
644 }
645
646 t = index(cs, t)
647 t2 = ii+1 <= length(s) ? index(cs, substr(s,ii+1,1)) : 0
648 t3 = ii+2 <= length(s) ? index(cs, substr(s,ii+2,1)) : 0
649 t4 = ii+3 <= length(s) ? index(cs, substr(s,ii+3,1)) : 0
650
651 if ( c < 0 && t > 64 && t <= 96 && ii+1 <= length(s) && t2 > 0 && t2 <= 64) # two character UTF-8 sequence
652 {
653 c = (t - 65)*64 + (t2-1)
654 s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii+1==length(s) ? "" : substr(s, ii+2))
655 ii += 5
656 }
657
658 else if ( c < 0 && t > 96 && t <= 112 && ii+2 <= length(s) && t2 > 0 && t2 <= 64 && t3 > 0 && t3 <= 64) # three character UTF-8 sequence
659 {
660 c = (t - 97)*4096 + (t2-1)*64 + (t3-1)
661 if ( c < 65536 )
662 {
663 s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii+2==length(s) ? "" : substr(s, ii+3))
664 ii += 5
665 }
666 else
667 {
668 # encode in JSON-style with two \u#### UTF-16 surrogates
669 # printf("1: %08X\n", c)
670 s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X\\u%04X", (c/1024)%1024 + 55296, c%1024 + 56320) (ii+3==length(s) ? "" : substr(s, ii+4))
671 ii += 11
672 }
673 }
674
675 # four character UTF-8 sequence, encode in JSON-style with two \u#### UTF-16 surrogates
676 else if ( c < 0 && t > 112 && t <= 120 && ii+3 <= length(s) && t2 > 0 && t2 <= 64 && t3 > 0 && t3 <= 64 && t4 > 0 && t4 <= 64)
677 {
678 c = (t - 113)*262144 + (t2-1)*4096 + (t3-1)*64 + (t4-1)
679 # printf("2: %08X, %d, %d, %d, %d\n", c, t, t2, t3, t4)
680 # printf("\\u%04X\\u%04X\n", (c/1024)%1024 + 55296, c%1024 + 56320)
681 c -= 65536
682 s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X\\u%04X", (c/1024)%1024 + 55296, c%1024 + 56320) (ii+3==length(s) ? "" : substr(s, ii+4))
683 ii += 11
684 }
685 }
686
687 return "\"" s "\""
688}
689
690function useJSON_GetDataValue(jsonData, prefix)
691{
692 return prefix in jsonData ? jsonData[prefix] : "<<novalue>>"
693}
694
695function useJSON_PrettyFormat(s, pretty)
696{
697 if (s == "" || pretty <= 0) return s
698
699 # dont sprintf the whole thing, some awks have short buffers for sprintf
700 return sprintf("%*.*s", (pretty-1)*3, (pretty-1)*3, "") s (s == "}" || s == "]" ? "" : "\n")
701}
702
703function useJSON_FormatInt(jsonData, jsonSchema, prefix, pretty, allLines, member, memberArr, memberList, arrCount, a, ii)
704{
705 memberList = useJSON_GetObjectMembers(jsonSchema, prefix)
706
707 if ( memberList == "" )
708 {
709 a = useJSON_GetDataValue(jsonData, prefix)
710 if ( a == "<<true>>" ) return "true"
711 if ( a == "<<false>>" ) return "false"
712 if ( a == "<<null>>" ) return "null"
713 if ( a == "<<novalue>>" ) return "" # <<novalue>> is a help for dealing with empty arrays and objects
714
715 # if it looks like a number, encode it as such. Cant tell a string from a number.
716 if (a "" ~ /^(\-?)(0|[123456789][0123456789]*)(\.[0123456789]+)?([eE][+-]?[0123456789]+)?$/)
717 return a
718
719 return useJSON_EscapeString(a)
720 }
721
722 split(memberList, memberArr, SUBSEP)
723 arrCount = useJSON_ArrayCount( memberArr )
724
725 if ( arrCount >= 0 )
726 {
727 allLines = "[" (pretty == 0 ? "" : "\n")
728
729 for ( ii = 1 ; ii <= arrCount ; ii++ )
730 allLines = allLines useJSON_PrettyFormat(useJSON_FormatInt( jsonData, jsonSchema, prefix (prefix == "" ? "" : SUBSEP) ii, (pretty != 0 ? pretty+1 : 0)) (ii < arrCount ? "," : ""), pretty != 0 ? pretty+1 : 0)
731 allLines = allLines useJSON_PrettyFormat("]", pretty)
732
733 return allLines
734 }
735
736 allLines = "{" (pretty == 0 ? "" : "\n")
737
738 ii = 0
739
740 arrCount = 0
741 for (a in memberArr)
742 arrCount++
743
744 utlJSON_qsortArray(memberArr, 1, arrCount)
745
746 for ( ii = 1 ; ii <= arrCount ; ii++ )
747 allLines = allLines useJSON_PrettyFormat(useJSON_EscapeString(memberArr[ii]) (pretty == 0 ? ":" : " : ") useJSON_FormatInt(jsonData, jsonSchema, prefix (prefix == "" ? "" : SUBSEP) memberArr[ii], (pretty != 0 ? pretty+1 : 0)) (ii < arrCount ? "," : ""), pretty != 0 ? pretty+1 : 0)
748
749 allLines = allLines useJSON_PrettyFormat("}", pretty)
750
751 return allLines
752}
753
754#
755# Entry Points
756#
757
758#
759# ParseJSON : Parse JSON text into an awk array
760#
761# jsonString : JSON text
762# jsonData : array of parsed JSON data
763#
764# returns : N/A
765#
766function ParseJSON(jsonString, jsonData, jsonStringArr, cnt)
767{
768 # newlines split differently in some awks, replace them with formfeeds (also white space)
769 # if (split("1\n2\n3", jsonData, ",") == 3) # is this an awk that splits newlines differently?
770 gsub(/\n/, "\f", jsonString) # always replace literal newlines - allows compatibility when testing
771
772 split("", jsonData) # clear the array jsonData
773 cnt = split(jsonString, jsonStringArr, ",")
774 prsJSON_ParseJSONInt(jsonStringArr, cnt, 1, jsonData, "")
775}
776
777#
778# FormatJSON : Format parsed JSON data back into JSON text
779#
780# jsonData : array of parsed JSON data
781# pretty : 0 = compact format, non-zero = pretty format
782#
783# returns : string with JSON text
784#
785function FormatJSON(jsonData, pretty, jsonSchema)
786{
787 useJSON_GetSchema(jsonData, jsonSchema)
788 return useJSON_FormatInt(jsonData, jsonSchema, "", pretty ? 1 : 0)
789}
790
791#
792# JSONArrayLength : Find number of members in a JSON array
793#
794# jsonData : array of parsed JSON data
795# prefix : array name
796#
797# returns : number of entries in the array
798#
799function JSONArrayLength(jsonData, prefix, a, cnt, tv)
800{
801 cnt = -1
802
803 for (a in jsonData)
804 {
805 if (prefix == "" || index(a, prefix) == 1)
806 {
807 tv = substr(a, prefix == "" ? 1 : (1+length(prefix)+1))
808 if ( index(tv, SUBSEP) )
809 tv = substr(tv, 1, index(tv, SUBSEP)-1)
810 tv = tv + 0
811 if ( tv > cnt )
812 cnt = tv
813 }
814 }
815
816 return cnt
817}
818
819#
820# JSONUnescapeString : turn a JSON-escaped string into UTF-8
821#
822# jsonString : the escaped JSON string to convert
823#
824# returns : the string in UTF-8
825#
826function JSONUnescapeString(jsonString)
827{
828 return prsJSON_UnescapeString(jsonString)
829}
830
831#
832# JSONIsTrue : return non-zero if the value is the true value
833#
834# jsonValue : the value to test
835#
836# returns : true or false
837#
838function JSONIsTrue(jsonValue)
839{
840 return jsonValue == "<<true>>";
841}
842
843#
844# JSONIsFalse : return non-zero if the value is the false value
845#
846# jsonValue : the value to test
847#
848# returns : true or false
849#
850function JSONIsFalse(jsonValue)
851{
852 return jsonValue == "<<false>>";
853}
854
855#
856# JSONIsNull : return non-zero if the value is the null value
857#
858# jsonValue : the value to test
859#
860# returns : true or false
861#
862function JSONIsNull(jsonValue)
863{
864 return jsonValue == "<<null>>";
865}
866
867#
868# JSONObjectMembers : get the set of members of an object
869#
870# jsonData : array of parsed JSON data
871# prefix : object name
872# memberArr : [out] an array of the names of the object members, if the target was an object or an array
873#
874# returns : If the target was actually an array rather than an object, the number of elements in the array
875# Else, zero if the target was an object or a value
876#
877function JSONObjectMembers(jsonData, prefix, memberArr, jsonSchema, memberList, rv, a)
878{
879 useJSON_GetSchema(jsonData, jsonSchema)
880 memberList = useJSON_GetObjectMembers(jsonSchema, prefix)
881
882 if ( memberList == "" )
883 {
884 split("", memberArr)
885 return 0
886 }
887
888 split(memberList, memberArr, SUBSEP)
889 rv = useJSON_ArrayCount( memberArr )
890 if ( rv == -1 ) # not an array, sort the object member names
891 {
892 rv = 0
893 for (a in memberArr)
894 rv++
895
896 utlJSON_qsortArray(memberArr, 1, rv)
897 rv = 0
898 }
899 return rv
900}
901# End of Copyright (c) 2010 Dan Saar
902
903function debug(lvl, text) {
904 if (lvl <= DEBUG)
905 print text > "/dev/stderr"
906}
907
908function wpt_init() {
909 available = "True"
910 archived = "False"
911 sym = "Geocache"
912 json_log_bool = 0
913 logs = ""
914 logs_section = 0
915 hints = ""
916 lat = ""
917 yy = 0
918 wplist = ""
919 nattr_yes = 0
920 nattr_no = 0
921 gs_type = ""
922}
923
cffb6fa9
RN
924function dec2utf(dec) {
925 if (dec <= 0x7f)
926 return sprintf("%c", dec)
927 else if (dec <= 0x07ff)
928 return sprintf("%c%c", 0xC0 + rshift(dec, 6), 0x80 + and(dec, 0x3F) )
929}
930
931function asc2xml(txt, o, ent, dec) {
932 o = ""
933 while (match(txt, "&#x[0123456789abcdefABCDEF]*;"))
934 {
935 o = o substr(txt, 1, RSTART-1)
936 ent = toupper(substr(txt, RSTART+3, RLENGTH-4))
937 txt = substr(txt, RSTART+RLENGTH)
938 dec = hex2dec(ent)
939 o = o dec2utf(dec)
940 }
941 txt = o txt
942 o = ""
943 while (match(txt, "&#[0123456789]*;"))
944 {
945 o = o substr(txt, 1, RSTART-1)
946 ent = toupper(substr(txt, RSTART+2, RLENGTH-3))
947 txt = substr(txt, RSTART+RLENGTH)
948 dec = ent + 0
949 o = o dec2utf(dec)
950 }
951 o = o txt
952 return o
953}
954
06ee5545 955function umlauts(text) {
cffb6fa9
RN
956 text = asc2xml(text)
957 if(0)
958 {
959 # Somewhat minimal translation of HTML entities in titles
960 gsub("&#228;", "\xc3\xa4", text)
961 gsub("&#xE4;", "\xc3\xa4", text)
962 gsub("&#246;", "\xc3\xb6", text)
963 gsub("&#xF6;", "\xc3\xb6", text)
964 gsub("&#252;", "\xc3\xbc", text)
965 gsub("&#xFC;", "\xc3\xbc", text)
966 gsub("&#196;", "\xc3\x84", text)
967 gsub("&#xC4;", "\xc3\x84", text)
968 gsub("&#214;", "\xc3\x96", text)
969 gsub("&#xD6;", "\xc3\x96", text)
970 gsub("&#220;", "\xc3\x9c", text)
971 gsub("&#xDC;", "\xc3\x9c", text)
972 gsub("&#223;", "\xc3\x9f", text)
973 gsub("&#xDF;", "\xc3\x9f", text)
974 gsub("&#176;", "\xc2\xb0", text)
975 gsub("&#xB0;", "\xc2\xb0", text)
976 gsub("&#180;", "\xc2\xb4", text)
977 gsub("&#xB4;", "\xc2\xb4", text)
978 }
06ee5545
RN
979 gsub("&amp;", "\\&", text)
980 return text
981}
982
983function htmlclean(text) {
984 gsub("&nbsp;", " ", text)
985 gsub("</?[pP][^>]*>", "\n", text)
986 gsub("<[bB][rR][^>]*>", "\n", text)
987 gsub("<[^>]*>", "", text)
988 # compress whitespace
989 gsub("\n\n\n*", "\n\n", text)
990 gsub("[ \t][ \t]*", " ", text)
991 return text
992}
993
994function tableclean(text) {
995 gsub("\n", "", text)
996 gsub("&nbsp;", " ", text)
997 # translate/remove HTML tags
998 gsub("</?[pP][^>]*>", "\n", text)
999 gsub("</[bB][rR][^>]*>", "", text)
1000 gsub("</?font[^>]*>", "", text)
1001 gsub("</?table[^>]*>", "", text)
1002 gsub("<t[rdh]>", "", text)
1003 gsub("</tr>", "\n", text)
1004 gsub("</t[dh][^>]*>", " | ", text)
1005 gsub("<[^>]*>", "", text)
1006 # compress whitespace
1007 gsub("[ \t][ \t]*", " ", text)
1008 return text
1009}
1010
1011function remdiv(text, tag) {
1012 if (tag != "")
1013 pat = ".*<div id=." tag ".[^>]*>[ \t\n]*"
1014 else
1015 pat = ".*<div[^>]*>[ \t\n]*"
1016 sub(pat, "", text)
1017 while (text !~ "/?div")
1018 {
1019 if (getline more <= 0)
1020 break
1021 text = text "\n" more
1022 }
1023 sub("[ \t\n]*</div>.*", "", text)
1024 debug(3, "Div:\n" text)
1025 return text
1026}
1027
1028function remspan(text, tag) {
1029 if (tag != "")
1030 pat = ".*<span id=." tag ".[^>]*>[ \t\n]*"
1031 else
1032 pat = ".*<span[^>]*>[ \t\n]*"
1033 sub(pat, "", text)
1034 while (text !~ "/?span")
1035 {
1036 if (getline more <= 0)
1037 break
1038 text = text "\n" more
1039 }
1040 sub("[ \t\n]*</span>.*", "", text)
1041 debug(3, "Span:\n" text)
1042 return text
1043}
1044
1045function remspanlong(text, tag) {
1046 if (tag != "")
1047 pat = ".*<span id=." tag ".[^>]*>[ \t\n]*"
1048 else
1049 pat = ".*<span[^>]*>[ \t\n]*"
1050 sub(pat, "", text)
1051 # i = "span level"
1052 i = 1; j = 0
1053 debug(2, length(text) "\t" i " " j++ " " text)
1054 # input is in text
1055 while (i != 0)
1056 {
1057 # emergency exit
1058 if (length(text) > 500000)
1059 {
1060 debug(0, "Warning: logs exceeded 500,000 bytes!")
1061 break
1062 }
1063 # cleanup: remove </*span...>, adjust "span level"
1064 while (text ~ "</*span.*>")
1065 {
1066 if (text ~ "</span>")
1067 {
1068 --i; sub("</span>", "", text)
1069 }
1070 if (text ~ "<span.*>")
1071 {
1072 ++i; sub("<span[^>]*>", "", text)
1073 }
1074 }
1075 debug(2, "=" length(text) "\t" i " " j++ " " text)
1076 # if "span level" down to zero, closing tag reached
1077 if (i == 0) break
1078 # get more input
1079 if (getline more <= 0)
1080 break
1081 text = text "\n" more
1082 debug(2, "+" length(more) "\t" i " " j++ " " more)
1083 }
1084 debug(1, length(text) "\t" i " " j++)
1085 sub("[ \t\n]*</span>.*", "", text)
1086 gsub("&nbsp;", " ", text)
1087 if (tag == "CacheLogs")
1088 gsub("</?table[^>]*>", "", text)
1089 debug(3, "SpanLong:\n" text)
1090 return text
1091}
1092
1093function remwaypoints() {
1094 text = ""
1095 while (text !~ "</table>" && text !~ "No additional waypoints to display")
1096 {
1097 if (getline more <= 0)
1098 break
1099 text = text " " more
1100 }
1101 gsub("&nbsp;", " ", text)
1102 gsub("\n[ \t]*", "", text)
1103 debug(3, "Waypoints:\n" text "\nEnd Waypoints")
1104 return text
1105 # will return complete table contents! split by </tr> instead of
1106 # <STRONG><img...>
1107}
1108
1109function splitwaypoints(waypoints,
1110 line, fld, prefix, lookup, wpname, x, y, lat, lon) {
1111 text=""
1112 # separate lines
1113 split(waypoints, wps, "</tr>")
1114 i = 0
1115 for (wp in wps)
1116 ++i
1117 wp = 1 # skip header line
1118 while (wp < i)
1119 {
1120 ++wp
1121 # get URL from full table line
1122 url = wps[wp]
1123 gsub(".*href=.", "", url)
1124 gsub("\".*", "", url)
1125 if (url !~ "^http:")
1126 {
1127 url = ""
1128 }
1129 else
1130 {
1131 debug(1, "url: " url)
1132 }
1133 # individual fields without leading/trailing blanks, remove HTML tags
1134 split(wps[wp], line, "</td>")
1135 j = 0
1136 for (fld in line)
1137 {
1138 ++j
1139 debug(2, "Before Line[" fld "]: " line[fld])
1140 gsub("[ \t]*<[^>]*>", "", line[fld])
1141 gsub("^[ \t]*", "", line[fld])
1142 gsub("[ \t]*$", "", line[fld])
1143 debug(2, "after Line[" fld "]: " line[fld])
1144 }
1145 # 8 fields: 1st line old style
1146 # 9 fields: 1st line new style
1147 # 4 fields, [1]~"Note:": 2nd line old style
1148 # 4 fields, [2]~"Note:": 2nd line new style
1149 # else: drop
1150 if (j == 8)
1151 {
1152 # main information line, old style (pre-2010/07)
1153 if (!line[3]) continue
1154 prefix = substr(line[3] "00", 1, 2)
1155 lookup = line[4]
1156 wpname = line[5]
1157 lat = toupper(line[6])
1158 gsub(" *[EW].*", "", lat)
1159 split(lat, y)
1160 lat = y[2] + y[3]/60.0
1161 if (y[1] == "S")
1162 lat = -lat
1163 lon = toupper(line[6])
1164 gsub("[NS] *[0-9]*.. *[0-9.]* ", "", lon)
1165 gsub("[^ 0-9.NESW-]", "", lon)
1166 split(lon, x)
1167 lon = x[2] + x[3]/60.0
1168 if (x[1] == "W")
1169 lon = -lon
1170 text = text sprintf("\nlat=\"%.6f\" lon=\"%.6f\"|%s|%s|%s|%s",
1171 lat, lon, prefix, lookup, wpname, url)
1172 }
1173 else if (j == 9)
1174 {
1175 # main information line, new style (2010/07)
1176 if (!line[4]) continue
1177 prefix = substr(line[4] "00", 1, 2)
1178 lookup = line[5]
1179 wpname = line[6]
1180 lat = toupper(line[7])
1181 gsub(" *[EW].*", "", lat)
1182 split(lat, y)
1183 lat = y[2] + y[3]/60.0
1184 if (y[1] == "S")
1185 lat = -lat
1186 lon = toupper(line[7])
1187 gsub("[NS] *[0-9]*.. *[0-9.]* ", "", lon)
1188 gsub("[^ 0-9.NESW-]", "", lon)
1189 split(lon, x)
1190 lon = x[2] + x[3]/60.0
1191 if (x[1] == "W")
1192 lon = -lon
1193 text = text sprintf("\nlat=\"%.6f\" lon=\"%.6f\"|%s|%s|%s|%s",
1194 lat, lon, prefix, lookup, wpname, url)
1195 }
1196 else if (j == 4)
1197 {
1198 if (line[1] ~ "Note:")
1199 {
1200 # continuation line, old style
1201 text = text sprintf("|%s", line[2])
1202 }
1203 else if (line[2] ~ "Note:")
1204 {
1205 # continuation line, new style
1206 text = text sprintf("|%s", line[3])
1207 }
1208 }
1209 }
1210 debug(3, "Split WPs\n" text)
1211 return text
1212}
1213
1214function wpclean(waypoints, line, fld, prefix, lookup, wpname, coords) {
1215 # simplify Additional Waypoints table:
1216 # prefixedname - name<br>coordfield<br>note
1217 text = ""
1218 split(waypoints, wps, "</tr>")
1219 i = 0
1220 for (wp in wps)
1221 ++i
1222 wp = 1
1223 while (wp < i)
1224 {
1225 ++wp
1226 split(wps[wp], line, "</td>")
1227 j = 0
1228 for (fld in line)
1229 {
1230 ++j
1231 gsub("[ \t]*<[^>]*>", "", line[fld])
1232 gsub("^[ \t]*", "", line[fld])
1233 gsub("[ \t]*$", "", line[fld])
1234 }
1235 # 8 fields: 1st line old style
1236 # 9 fields: 1st line new style
1237 # 4 fields, [1]~"Note:": 2nd line old style
1238 # 4 fields, [2]~"Note:": 2nd line new style
1239 # else: drop
1240 if (j == 8)
1241 {
1242 # main information line, old style (pre-2010/07)
1243 if (!line[3]) continue
1244 prefix = substr(line[3] "00", 1, 2) substr(gcid, 3)
1245 lookup = line[4]
1246 wpname = line[5]
1247 gsub(" \\(.*\\).*", "", wpname)
1248 coords = toupper(line[6])
1249 text = text sprintf("%s - %s<br />%s<br />", prefix, wpname, coords)
1250 }
1251 else if (j == 9)
1252 {
1253 # main information line, new style (2010/07)
1254 if (!line[4]) continue
1255 prefix = substr(line[4] "00", 1, 2) substr(gcid, 3)
1256 lookup = line[5]
1257 wpname = line[6]
1258 gsub(" \\(.*\\).*", "", wpname)
1259 coords = toupper(line[7])
1260 text = text sprintf("%s - %s<br />%s<br />", prefix, wpname, coords)
1261 }
1262 else if(j == 4)
1263 {
1264 if (line[1] ~ "Note:")
1265 {
1266 # continuation line, old style
1267 text = text sprintf("%s<br />", line[2])
1268 }
1269 else if (line[2] ~ "Note:")
1270 {
1271 # continuation line, new style
1272 text = text sprintf("%s<br />", line[3])
1273 }
1274 }
1275 }
1276 debug(3, "Clean WPs\n" text)
1277 return text
1278}
1279
1280function hex2dec(x, val) {
1281 for (val = 0; length(x); x = substr(x, 2))
1282 val = 16*val + index("0123456789ABCDEF", substr(x, 1, 1)) - 1
1283 return val
1284}
1285
1286# Convert GC0000 to 58913
1287function wp2id(wp, val) {
1288 sub("^GC", "", wp)
1289 debug(5, "wp2id: " wp " ...")
1290 if ((length(wp) <= 4) && (wp < "G000"))
1291 {
1292 # old hex style
1293 val = hex2dec(wp)
1294 debug(5, "wp2id hex: " val " ...")
1295 return val
1296 }
1297 # new style, base-31, can have 4 or more places!
1298 set = "0123456789ABCDEFGHJKMNPQRTVWXYZ"
1299 val = 0
1300 for (pos = 1; pos <= length(wp); ++pos)
1301 {
1302 val *= 31
1303 val += index(set, substr(wp, pos, 1)) - 1
1304 }
1305 val = val - 411120
1306 debug(5, "wp2id id: " val " ...")
1307 return val
1308}
1309
1310# to decode hints: rot13 http://lorance.freeshell.org/rot13/
1311function rot13 (string) {
1312 ROTFROM = "nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM"
1313 ROTTO = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
1314 retstr = ""
1315 for (pos = 0; pos < length(string); pos++)
1316 {
1317 char = substr(string,pos + 1,1)
1318 rotpos = index(ROTFROM,char)
1319 if (rotpos > 0)
1320 char = substr(ROTTO,rotpos,1)
1321 retstr = retstr char
1322 }
1323 return retstr
1324}
1325
1326function tagstart(lvl, tag, parms) {
1327 printf "%*s", lvl*2, ""
1328 if (parms == "")
1329 printf "<%s>\n", tag
1330 else
1331 printf "<%s %s>\n", tag, parms
1332}
1333
1334function tagend(lvl, tag) {
1335 printf "%*s", lvl*2, ""
1336 printf "</%s>\n", tag
1337}
1338
1339function ee(text) {
1340 gsub(/&/, "\\&amp;", text)
1341 gsub(/</, "\\&lt;", text)
1342 gsub(/>/, "\\&gt;", text)
1343 return text
1344}
1345
1346function tagtext(lvl, tag, text) {
1347 text = ee(text)
1348 printf "%*s", lvl*2, ""
1349 printf "<%s>%s</%s>\n", tag, text, tag
1350}
1351
1352function tagptext(lvl, tag, parms, text) {
1353 text = ee(text)
1354 printf "%*s", lvl*2, ""
1355 printf "<%s %s>%s</%s>\n", tag, parms, text, tag
1356}
1357
1358function attr_begin1(gif, id, text) {
1359 debug(1, "attr_begin1: " gif " " id " \"" text "\"")
1360 attr_id[gif] = id; attr_text[gif] = text
1361 debug(1, "attr_id: " attr_id["slealth"])
1362 debug(1, "attr_id: " attr_id[gif])
1363}
1364function attr_begin() {
1365 # attr_begin1("slealth", 40, "Stealth required") Dont work!!!
1366 attr_id["dog"] = 1; attr_text["dog"] = "Dogs"
cffb6fa9 1367 attr_id["dogs"] = 1; attr_text["dogs"] = "Dogs"
06ee5545
RN
1368 attr_id["fee"] = 2; attr_text["fee"] = "Access or parking fee"
1369 attr_id["rappelling"] = 3; attr_text["rappelling"] = "Climbing gear"
1370 attr_id["boat"] = 4; attr_text["boat"] = "Boat"
1371 attr_id["scuba"] = 5; attr_text["scuba"] = "Scuba gear"
1372 attr_id["kids"] = 6; attr_text["kids"] = "Recommended for kids"
1373 attr_id["onehour"] = 7; attr_text["onehour"] = "Takes less than an hour"
1374 attr_id["scenic"] = 8; attr_text["scenic"] = "Scenic view"
1375 attr_id["hiking"] = 9; attr_text["hiking"] = "Significant hike"
1376
1377 attr_id["climbing"] = 10; attr_text["climbing"] = "Difficult climbing"
1378 attr_id["wading"] = 11; attr_text["wading"] = "May require wading"
1379 attr_id["swimming"] = 12; attr_text["swimming"] = "May require swimming"
1380 attr_id["available"] = 13; attr_text["available"] = "Available at all times"
1381 attr_id["night"] = 14; attr_text["night"] = "Recommended at night"
1382 attr_id["winter"] = 15; attr_text["winter"] = "Available during winter"
cffb6fa9
RN
1383 attr_id["poisonoak"] = 16; attr_text["poisonoak"] = "Poison plants"
1384 attr_id["dangerousanimals"] = 17; attr_text["dangerousanimals"] = "Dangerous Animals"
1385 attr_id["ticks"] = 18; attr_text["ticks"] = "Ticks"
1386
1387 attr_id["mines"] = 19; attr_text["mines"] = "Abandoned mines"
1388 attr_id["cliff"] = 20; attr_text["cliff"] = "Cliff / falling rocks"
1389 attr_id["hunting"] = 21; attr_text["hunting"] = "Hunting"
1390 attr_id["danger"] = 22; attr_text["danger"] = "Dangerous area"
1391 attr_id["wheelchair"] = 23; attr_text["wheelchair"] ="Wheelchair accessible"
1392 attr_id["parking"] = 24; attr_text["parking"] = "Parking available"
1393 attr_id["public"] = 25; attr_text["public"] = "Public transportation"
1394 attr_id["water"] = 26; attr_text["water"] = "Drinking water nearby"
1395 attr_id["restrooms"] = 27; attr_text["restrooms"] ="Public restrooms nearby"
1396 attr_id["phone"] = 28; attr_text["phone"] = "Telephone nearby"
1397
1398 attr_id["picnic"] = 29; attr_text["picnic"] = "Picnic tables nearby"
1399 attr_id["camping"] = 30; attr_text["camping"] = "Camping available"
1400 attr_id["bicycles"] = 31; attr_text["bicycles"] = "Bicycles"
1401 attr_id["motorcycles"] = 32; attr_text["motorcycles"] = "Motorcycles"
1402 attr_id["quads"] = 33; attr_text["quads"] = "Quads"
1403 attr_id["jeeps"] = 34; attr_text["jeeps"] = "Off-road vehicles"
1404 attr_id["snowmobiles"] = 35; attr_text["snowmobiles"] = "Snowmobiles"
1405 attr_id["horses"] = 36; attr_text["horses"] = "Horses"
1406 attr_id["campfires"] = 37; attr_text["campfires"] = "Campfires"
1407 attr_id["thorns"] = 38; attr_text["thorns"] = "Thorns"
1408
1409 attr_id["stealth"] = 39; attr_text["stealth"] = "Stealth required"
1410 attr_id["stroller"] = 40; attr_text["stroller"] = "Stroller accessible"
1411 attr_id["firstaid"] = 41; attr_text["firstaid"] = "Needs maintenance"
1412 attr_id["cow"] = 42; attr_text["cow"] = "Watch for livestock"
1413 attr_id["flashlight"] = 43; attr_text["flashlight"] = "Flashlight required"
06ee5545 1414 attr_id["landf"] = 44; attr_text["landf"] = "Lost And Found Tour"
cffb6fa9
RN
1415 attr_id["rv"] = 45; attr_text["rv"] = "Recreational Vehicle"
1416 attr_id["field"] = 46; attr_text["field"] = "Field Puzzle"
1417 attr_id["UV"] = 47; attr_text["UV"] = "UV Light Required"
1418 attr_id["snowshoes"] = 48; attr_text["snowshoes"] = "Snowshoes"
1419
1420 attr_id["skiis"] = 49; attr_text["skiis"] = "Cross Country Skis"
1421 attr_id["s-tool"] = 50; attr_text["s-tool"] = "Special Tool Required"
1422 attr_id["nightcache"] = 51; attr_text["nightcache"] = "Night Cache"
1423 attr_id["parkngrab"] = 52; attr_text["parkngrab"] = "Park and Grab"
1424 attr_id["AbandonedBuilding"] = 53; attr_text["AbandonedBuilding"] = "Abandoned Structure"
1425 attr_id["hike_short"] = 54; attr_text["hike_short"] = "Short hike (less than 1km)"
1426 attr_id["hike_med"] = 55; attr_text["hike_med"] = "Medium hike (1km-10km)"
1427 attr_id["hike_long"] = 56; attr_text["hike_long"] = "Long hike (+10km)"
1428 attr_id["fuel"] = 57; attr_text["fuel"] = "Fuel Nearby"
1429 attr_id["food"] = 58; attr_text["food"] = "Food Nearby"
1430
1431 attr_id["wirelessbeacon"] = 59; attr_text["wirelessbeacon"] = "Wireless Beacon"
1432 attr_id["partnership"] = 60; attr_text["partnership"] = "Partnership"
1433 attr_id["seasonal"] = 61; attr_text["seasonal"] = "Seasonal Access"
1434 attr_id["tourist"] = 62; attr_text["tourist"] = "Tourist Friendly"
1435 attr_id["treeclimbing"] = 63; attr_text["treeclimbing"] = "Tree Climbing"
1436 attr_id["frontyard"] = 64; attr_text["frontyard"] = "Front Yard (Private Residence)"
1437 attr_id["teamwork"] = 65; attr_text["teamwork"] = "Teamwork Required"
06ee5545
RN
1438}
1439
1440function tagattr(lvl, kind, yesno) {
1441 kind = kind ""
1442 #debug(1, "kind: \"" kind "\"")
1443 if (attr_id[kind] == 0)
1444 return
1445 printf "%*s", lvl*2, ""
1446 printf "<groundspeak:attribute id=\"%d\" inc=\"%d\">", attr_id[kind], yesno
1447 printf "%s", attr_text[kind]
1448 printf "</groundspeak:attribute>\n"
1449}
1450
1451/cache_types.aspx/ { # gc 02/01/11
1452 gs_type = $0
1453 sub(/.* alt=./, "", gs_type)
1454 sub(/. width=.*/, "", gs_type)
cffb6fa9
RN
1455 sub(/. title=.*/, "", gs_type)
1456 debug(1, "gs_type: " gs_type)
06ee5545
RN
1457}
1458/<span id="ctl00_ContentBody_CacheName">/ {
1459 if (gs_type)
1460 {
1461 gs_name = remspan($0, "ctl00_ContentBody_CacheName")
1462 next
1463 }
1464 gs_type = $0
1465 sub(/.* alt=./, "", gs_type)
1466 sub(/. width=.*/, "", gs_type)
1467 debug(1, "type: " gs_type)
1468}
1469/<span id="CacheName">/ { gs_name = remspan($0, "CacheName") }
1470/<span id="ctl00_ContentBody_CacheName">/ {
1471 gs_name = remspan($0, "ctl00_ContentBody_CacheName")
1472}
1473/<span id=".*WaypointName".*>/ { gcid = remspan($0) }
1474/;wp=GC.*" / {
1475 # new way, yech!
1476 gcid = $0; sub(/.*wp=/, "", gcid); sub(/".*/, "", gcid)
1477}
1478/<span id=".*ShortDescription">/ {
1479 gs_short_description = remspan($0)
1480}
1481/<span id="LongDescription">/ {
1482 gs_long_description = remspanlong($0, "LongDescription")
1483 waypoints = ""
1484}
1485/<span id="ctl00_ContentBody_LongDescription">/ {
1486 gs_long_description = remspanlong($0, "ctl00_ContentBody_LongDescription")
1487 waypoints = ""
1488}
1489/<div id="div_hint"/ {
1490 hints = remdiv($0)
1491 gsub("\n", " ", hints)
1492 gsub("^ *", "", hints)
1493 gsub("<br>", "\n", hints)
1494 if (DECODE)
1495 hints=rot13(hints)
1496}
1497/<span id="Hints"/ {
1498 hints = remspan($0)
1499 hints = htmlclean(hints)
1500 if (DECODE)
1501 hints=rot13(hints)
1502 gsub("\n", " ", hints)
1503}
1504/<span id="ctl00_ContentBody_Hints"/ {
1505 hints = $0
1506 sub(".*displayMe.>", "", hints)
1507 sub("</span>.*", "", hints)
1508 gsub("<br>", "\n", hints)
1509 # debug(1, "Hints: " hints)
1510 if (DECODE)
1511 hints=rot13(hints)
1512}
1513/<b>Additional Waypoints/ {
1514 waypoints = remwaypoints()
1515 wplist = splitwaypoints(waypoints)
1516}
1517/<strong>Additional Waypoints/ {
1518 waypoints = remwaypoints()
1519 wplist = splitwaypoints(waypoints)
1520}
cffb6fa9
RN
1521# 03/01/2011
1522/ContentBody_WaypointsInfo/ {
1523 waypoints = remwaypoints()
1524 wplist = splitwaypoints(waypoints)
1525}
06ee5545
RN
1526/class="LogsTable Table"/ { # old
1527 logs_section = 1
1528}
1529/class="LogsTable"/ { # new 06/28/11
1530 logs_section = 1
1531}
1532(logs_section > 0) {
1533 logs = logs $0
1534}
1535(logs_section > 0) && /<table/ {
1536 logs_section += 1
1537}
1538(logs_section > 0) && /<\/table>/ {
1539 logs_section -= 1
1540}
1541
1542/<span id="CacheLogs">/ {
1543 logs = remspanlong($0, "CacheLogs")
1544 # remove header which does not exist >2010-01-12
1545 sub(".*td class=.containerHeader.>Cache Logs</td></tr>", "", logs)
1546}
1547/<span id="ctl00_ContentBody_CacheLogs">/ {
1548 logs = remspanlong($0, "ctl00_ContentBody_CacheLogs")
1549}
1550/<span id=".*CacheStats">/ { stats = remspan($0) }
1551/<span id=".*NumVisits">/ {
1552 numvisits = remspan($0)
1553 debug(1, numvisits)
1554}
1555
1556/lnkPrintFriendly/ {
1557 gid = $0
1558 if (gid ~ /ID=/)
1559 {
1560 # Printable page has ID number
1561 sub(/^.*ID=/, "", gid)
1562 sub(/&.*/, "", gid)
1563 }
1564 else
1565 {
1566 # Non-printable page has guid number
1567 sub(/^.*guid=/, "", gid)
1568 sub(/&.*/, "", gid)
1569 }
1570}
cffb6fa9
RN
1571# Add optional "A cache ". 08/21/2012
1572/^ *(A cache )*by <a href/ {
06ee5545
RN
1573 gs_owner = $0
1574 sub(/.*ds=2.>/, "", gs_owner)
1575 sub(/<.*/, "", gs_owner)
1576 debug(1, "owner: " gs_owner)
1577 gs_guid = $0
1578 sub(/.*guid=/, "", gs_guid)
1579 sub(/&.*/, "", gs_guid)
1580}
cffb6fa9
RN
1581# Fake gs_guid is user 03/01/2011
1582/userInfo = {ID:/ {
1583 gs_guid = $0
1584 sub(/.*: /, "", gs_guid)
1585 sub(/}.*/, "", gs_guid)
1586 debug(1, "guid " gs_guid)
1587}
06ee5545
RN
1588/.* alt=.Size/ {
1589 gs_size = $0
1590 sub(/.*Size: /, "", gs_size); sub(". />.*", "", gs_size)
1591}
1592/<span id="CacheOwner"/ {
1593 text = remspan($0)
1594 debug(1, "Owner text " text)
1595 gs_type = text; sub(/<.*/, "", gs_type)
1596 gs_owner = text
1597 debug(1, gs_owner)
1598 sub(/.*<br>by /, "", gs_owner); sub(/ [[].*/, "", gs_owner)
1599 debug(1, gs_owner)
1600 sub(/<a[^>]*>/, "", gs_owner)
1601 sub(/<.a[^>]*>/, "", gs_owner)
1602 sub(/.*<br .>/, "", gs_owner)
1603 sub(/^by /, "", gs_owner)
1604 debug(1, "owner " gs_owner)
1605 gs_size = text; sub(/.*Size: /, "", gs_size); sub(/<.*/, "", gs_size)
1606 gs_guid = text; sub(/.*guid=/, "", gs_guid)
1607 sub(/&.*/, "", gs_guid)
1608 debug(1, "guid " gs_guid)
1609}
1610/<span id="ctl00_ContentBody_CacheOwner"/ {
1611 text = $0
1612 debug(2, "Owner text: " text)
1613 gs_type = text
1614 sub(/<br .*/, "", gs_type)
1615 sub(/.*>/, "", gs_type)
1616 debug(1, "gs_type: " gs_type)
1617
1618 gs_owner = text
1619 sub(/.*ds=2.>/, "", gs_owner); sub(/<.*/, "", gs_owner)
1620 debug(1, "gs_owner: " gs_owner)
1621
1622 gs_size = text; sub(/.*Size: /, "", gs_size); sub(/<.*/, "", gs_size)
1623 gs_guid = text; sub(/.*guid=/, "", gs_guid)
1624 sub(/&.*/, "", gs_guid)
1625 sub(/. title=.*/, "", gs_guid)
1626 debug(1, "guid: " gs_guid)
1627}
1628/<span id="ErrorText"/ {
1629 if ($0 ~ "unavailable")
1630 available = "False"
1631 if ($0 ~ "been archived")
1632 archived = "True"
1633}
1634/<span id="ctl00_ContentBody_ErrorText"/ {
1635 errortext = remspan($0, "ctl00_ContentBody_ErrorText")
1636 if (errortext ~ "unavailable")
1637 available = "False"
1638 if (errortext ~ "been archived")
1639 archived = "True"
1640 debug(1, "available: " available "; archived: " archived)
1641}
1642/<span id="LargeMapPrint"/ {
1643 text = remspan($0)
1644 lat = text; sub(/.*latitude=/, "", lat); sub(/&.*/, "", lat)
1645 lon = text; sub(/.*longitude=/, "", lon); sub(/\".*/, "", lon)
1646 sub(/&.*/, "", lon)
1647}
1648/var lat=[-0-9]/ {
1649 if (lat == "")
1650 {
1651 lat = $0; sub(/.*lat=/, "", lat); sub(/;.*/, "", lat)
1652 lon = $0; sub(/.*lng=/, "", lon); sub(/;.*/, "", lon)
1653 }
1654}
1655/<span id=".*Location"/ {
1656 text = remspan($0)
1657 gs_state = text
1658 sub(/In */, "", gs_state)
1659 sub(/,.*/, "", gs_state)
1660
1661 gs_country = text;
1662 sub(/.*, /, "", gs_country)
1663 sub(/ <.*/, "", gs_country)
1664 sub(/^In /, "", gs_country)
1665}
1666/lat=.*; lng=.*; guid=/ {
1667 if (lat == "")
1668 {
1669 lat = $0; sub(/.*lat=/, "", lat); sub(/;.*/, "", lat)
1670 lon = $0; sub(/.*lng=/, "", lon); sub(/;.*/, "", lon)
1671 }
1672}
1673/<span class="minorCacheDetails">Hidden/ { # gc 2/1/11
1674 getline time
1675 getline time
1676 sub(/^ */, "", time)
1677 sub(/<.*/, "", time)
1678 split(time, fld, "/")
1679 time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2])
1680 debug(1, "time: " time)
1681}
1682/> <span class="minorCacheDetails">/ { # gc 6/28/11
1683 getline time
1684 getline time
1685 getline time
1686 sub(/^ */, "", time)
1687 sub(/<.*/, "", time)
1688 gsub(/-/, "/", time)
1689 rc = split(time, fld, "/")
1690 if (rc == 1)
1691 rc = split(time, fld, "-")
1692 debug(1, "timerc: " rc)
1693 if (DATEFMT == 1)
1694 time = sprintf("%d-%02d-%02d", fld[3], fld[2], fld[1])
1695 else if (fld[1] >= 1000)
1696 time = sprintf("%d-%02d-%02d", fld[1], fld[2], fld[3])
1697 else
1698 time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2])
1699 debug(1, "time: " time)
1700}
1701/<span id="DateHidden">/ {
1702 getline text
1703 time = remspan($0)
1704 split(time, fld, "/")
1705 time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2])
1706}
1707/<span id="ctl00_ContentBody_DateHidden">/ {
1708 time = remspan($0, "ctl00_ContentBody_DateHidden")
1709 rc = split(time, fld, "/")
1710 if (rc == 3)
1711 {
1712 time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2])
1713 debug(1, "time: " time)
1714 next
1715 }
1716 rc = split(time, fld, ",")
1717 if (rc == 3)
1718 {
1719 yyyy = fld[3];
1720 split(fld[2], fld, " ")
1721 mm = Month[ fld[1] ]
1722 dd = fld[2]
1723 time = sprintf("%d-%02d-%02d", yyyy, mm, dd)
1724 debug(1, "time: " time)
1725 next
1726 }
1727 time = ""
1728}
1729/ctl00_ContentBody_uxLegendScale/ {
1730 text = $0
1731 sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1732 gs_diff = text
1733 debug(1 , "gs_diff: " gs_diff)
1734}
cffb6fa9 1735/ctl00_ContentBody_Localize/ {
06ee5545
RN
1736 text = $0
1737 sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1738 gs_terr = text
1739 debug(1 , "gs_terr: " gs_terr)
1740}
1741/^ *Difficulty:<.strong>/ {
1742 getline text
1743 sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1744 gs_diff = text
1745 debug(1 , "gs_diff: " gs_diff)
1746}
1747/^ *Difficulty:/ { # gc 2/1/11
1748 getline text
1749 getline text
1750 getline text
1751 sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1752 gs_diff = text
1753 debug(1 , "gs_diff: " gs_diff)
1754}
1755/<span id="Difficulty">/ {
1756 text = remspan($0)
1757 sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1758 gs_diff = text
1759}
1760/<span id="ctl00_ContentBody_Difficulty">/ {
1761 text = remspan($0, "ctl00_ContentBody_Difficulty")
1762 sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1763 debug(1, "difficulty " text)
1764 gs_diff = text
1765}
1766/^ *Terrain:<.strong>/ {
1767 getline text
1768 sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1769 gs_terr = text
1770 debug(1 , "gs_terr: " gs_terr)
1771}
1772/^ *Terrain:/ { # gc 2/1/11
1773 getline text
1774 getline text
1775 getline text
1776 sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1777 gs_terr = text
1778 debug(1 , "gs_terr: " gs_terr)
1779}
1780/<span id="Terrain">/ {
1781 text = remspan($0)
1782 sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1783 gs_terr = text
1784}
1785/<span id="ctl00_ContentBody_Terrain">/ {
1786 text = remspan($0, "ctl00_ContentBody_Terrain")
1787 sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1788 debug(1, "terrain " text)
1789 gs_terr = text
1790}
1791/title=.What are Attributes?/ {
1792 text = $0
1793 debug(5, "Attr " text)
1794 gsub("<img src=./images/attributes/", "", text)
1795 # before 06/03/10
1796 gsub(/alt="[^"]*" width="30" height="30" .>/, "", text)
1797 # after 06/03/10
1798 gsub(/alt="[^"]*" title="[^"]*" width="30" height="30" .>/, "", text)
1799 gsub("<p class=.NoSpacing.*", "", text)
1800 gsub(/^ */, "", text)
1801 gsub(/\.gif../, "", text)
1802 gsub(/attribute-blank/, "", text)
1803
1804 attrs_yes = text
1805 gsub(/[a-z0-9A-Z]*-no/, "", attrs_yes)
1806 gsub(/-yes/, "", attrs_yes)
1807
1808 attrs_no = text
1809 gsub(/[a-z0-9A-Z]*-yes/, "", attrs_no)
1810 gsub(/-no/, "", attrs_no)
1811
1812 debug(1, "attrs_yes: " attrs_yes)
1813 debug(1, "attrs_no: " attrs_no)
1814 nattr_yes = split(attrs_yes, attr_yes, " ")
1815 nattr_no = split(attrs_no, attr_no, " ")
1816 debug(1, "nattr_yes: " nattr_yes)
1817 debug(1, "nattr_no: " nattr_no)
1818}
1819/^{.status.:.success/ {
1820 ParseJSON($0, json_logs)
1821 json_log_bool = 1
1822}
1823
1824BEGIN {
1825 Month["January"] = 1
1826 Month["February"] = 2
1827 Month["March"] = 3
1828 Month["April"] = 4
1829 Month["May"] = 5
1830 Month["June"] = 6
1831 Month["July"] = 7
1832 Month["August"] = 8
1833 Month["September"] = 9
1834 Month["October"] = 10
1835 Month["November"] = 11
1836 Month["December"] = 12
1837 BaseURL = "http://www.geocaching.com/seek/cache_details.aspx"
1838 attr_begin()
1839
1840 first = 1
1841
1842 wpt_init()
1843}
1844/<\/html>/ {
1845 if ((lat == "") || (lon == ""))
1846 {
1847 debug(0, "Waypoint coordinates not found for " gcid ", no output!")
1848 #next
1849 }
1850
1851 # too long a block to be indented
1852 if (!INCR && first)
1853 {
1854 print "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
1855 tagstart(0, "gpx")
1856 tagtext(1, "desc", "Geocache file generated by geo-html2gpx")
1857 tagtext(1, "author", "geo-html2gpx")
1858 "date +%Y-%m-%dT%H:%M:%S" | getline date
1859 tagtext(1, "time", date)
1860 first = 0
1861 }
1862
1863 gs_name = umlauts(gs_name)
1864 gs_owner = umlauts(gs_owner)
1865
1866 tagstart(1, "wpt", "lat=\"" lat "\" lon=\"" lon "\"")
1867 if (time != "")
cffb6fa9 1868 tagtext(2, "time", time "T07:00:00Z")
06ee5545
RN
1869 tagtext(2, "name", gcid)
1870 tagtext(2, "desc", gs_name " by " gs_owner ", " \
1871 gs_type " (" gs_diff "/" gs_terr ")")
1872
1873 # alternate URL... tagtext(2, "url", BaseURL "?wp=" gcid)
1874 # alternate URL... tagtext(2, "url", BaseURL "?id=" gid)
1875 tagtext(2, "url", BaseURL "?wp=" gcid)
1876 tagtext(2, "urlname", gs_name)
1877
1878 # we do this last... tagtext(2, "sym", sym)
1879
1880 tagtext(2, "type", "Geocache|" gs_type)
1881
1882 # FIXME? GC-written GPX files contain numeric, non-UUID,
1883 # cache/owner/finder ids
1884 # Oregon needs numeric cache id, or behaves erratically!
1885 gid = wp2id(gcid)
1886 tagstart(2, "groundspeak:cache",
1887 "id=\"" gid "\" available=\"" available \
1888 "\" archived=\"" archived "\"" \
1889 " xmlns:groundspeak=\"http://www.groundspeak.com/cache/1/0/1\"")
1890 tagtext(3, "groundspeak:name", gs_name)
1891 tagtext(3, "groundspeak:placed_by", gs_owner)
1892 tagptext(3,"groundspeak:owner", "id=\"" gs_guid "\"", gs_owner)
1893 tagtext(3, "groundspeak:type", gs_type)
cffb6fa9 1894 tagtext(3, "groundspeak:container", gs_size)
06ee5545
RN
1895
1896 if (nattr_yes != 0 || nattr_no != 0)
1897 {
1898 tagstart(3, "groundspeak:attributes")
1899 for (i = 1; i <= nattr_yes; ++i)
1900 tagattr(4, attr_yes[i], 1)
1901 for (i = 1; i <= nattr_no; ++i)
1902 tagattr(4, attr_no[i], 0)
1903 tagend(3, "groundspeak:attributes")
1904 }
1905
06ee5545
RN
1906 tagtext(3, "groundspeak:difficulty", gs_diff)
1907 tagtext(3, "groundspeak:terrain", gs_terr)
1908 tagtext(3, "groundspeak:country", gs_country)
1909 tagtext(3, "groundspeak:state", gs_state)
1910 if (!NOHTML)
1911 {
1912 tagptext(3, "groundspeak:short_description", "html=\"True\"",
1913 gs_short_description)
1914 if (!NOWPTS && waypoints)
1915 {
1916 # reproduce "simplified table" by GC PQ
1917 # prefixed_gcid - wpname<br />original_style_coord<br />note<br />
1918 waypoints = wpclean(waypoints)
1919 # include "zero" waypoints here!
1920 gs_long_description = gs_long_description \
1921 "<p>Additional Waypoints</p>" waypoints
1922 }
1923 tagptext(3, "groundspeak:long_description", "html=\"True\"",
1924 gs_long_description)
1925 }
1926 else
1927 {
1928 gs_short_description = htmlclean(gs_short_description)
1929 tagptext(3, "groundspeak:short_description", "html=\"False\"",
1930 gs_short_description)
1931 gs_long_description = htmlclean(gs_long_description)
1932 if (waypoints)
1933 gs_long_description = gs_long_description \
1934 "\n\nAdditional Waypoints\n" tableclean(waypoints)
1935 tagptext(3, "groundspeak:long_description", "html=\"False\"",
1936 gs_long_description)
1937 }
1938 tagtext(3, "groundspeak:encoded_hints", hints)
1939
1940 if (json_log_bool)
1941 {
1942 nlogs = JSONArrayLength(json_logs, "data")
cffb6fa9
RN
1943 if (nlogs > NUMLOGS+1)
1944 nlogs = NUMLOGS+1
06ee5545
RN
1945 debug(1, "New Logs: " nlogs)
1946 if (nlogs > 1)
1947 tagstart(3, "groundspeak:logs")
1948 else
1949 tagstart(3, "groundspeak:logs", "/")
1950
1951 for (i = 1; i < nlogs; ++i)
1952 {
1953 ltype = json_logs["data" SUBSEP i SUBSEP "LogTypeImage"]
1954 if (ltype ~ /smile/) ltype = "Found it"
1955 else if (ltype ~ /happy/) ltype = "Found it"
1956 else if (ltype ~ /note/) ltype = "Write note"
1957 else if (ltype ~ /sad/) ltype = "Didn'"'"'t Find it"
1958 else if (ltype ~ /attended/) ltype = "Attended"
1959 else if (ltype ~ /rsvp/) ltype = "Will Attend"
1960 else if (ltype ~ /greenlight/) ltype = "Green"
1961 else if (ltype ~ /traffic_cone/) ltype = "Archive"
1962 else if (ltype ~ /disabled/) ltype = "Temporarily Disable Listing"
1963 else if (ltype ~ /coord_update/) ltype = "Update Coordinates"
1964 else ltype = "Unknown"
1965
1966 ldate = json_logs["data" SUBSEP i SUBSEP "Visited"]
cffb6fa9
RN
1967 n = split(ldate, fld, "/")
1968 if (n == 3)
1969 {
1970 #new format: 08/18/2011
1971 if (DATEFMT == 1)
1972 ldate = sprintf("%d-%02d-%02dT20:00:00Z",
1973 fld[3], fld[2], fld[1])
1974 else
1975 ldate = sprintf("%d-%02d-%02dT20:00:00Z",
1976 fld[3], fld[1], fld[2])
1977 debug(2, "logdate: " ldate)
1978 }
06ee5545 1979 lfinder = json_logs["data" SUBSEP i SUBSEP "UserName"]
cffb6fa9 1980 lfinder = umlauts(lfinder)
06ee5545 1981 logid = json_logs["data" SUBSEP i SUBSEP "LogID"]
cffb6fa9 1982 guid = json_logs["data" SUBSEP i SUBSEP "AccountID"]
06ee5545
RN
1983 ltext = json_logs["data" SUBSEP i SUBSEP "LogText"]
1984 ltext = htmlclean(ltext)
1985 ltext = umlauts(ltext)
1986
1987 if (lfinder == USERNAME && ltype == "Found it")
1988 sym = "Geocache Found"
1989 if (lfinder == USERNAME && ltype == "Attended")
1990 sym = "Geocache Found"
1991 tagstart(4, "groundspeak:log", "id=\"" logid "\"")
1992 tagtext(5, "groundspeak:date", ldate)
1993 tagtext(5, "groundspeak:type", ltype)
1994 tagptext(5, "groundspeak:finder", "id=\"" guid "\"", lfinder)
1995 tagptext(5, "groundspeak:text", "encoded=\"" "False" "\"", ltext)
1996 tagend(4, "groundspeak:log")
1997 }
1998
1999 if (nlogs > 1)
2000 tagend(3, "groundspeak:logs")
2001 }
2002 else
2003 {
2004 # nlogs = split(logs, entry, "</tr>")
2005 nlogs = split(logs, entry, "</tr><tr>")
2006 if (nlogs > NUMLOGS+1)
2007 nlogs = NUMLOGS+1
2008
2009 if (nlogs > 1)
2010 tagstart(3, "groundspeak:logs")
2011 else
2012 tagstart(3, "groundspeak:logs", "/")
2013
2014 for (i = 1; i < nlogs; ++i)
2015 {
2016 sub("<tr><td[^>]*>", "", entry[i])
2017 sub("</td>", "", entry[i])
2018 if (!entry[i]) continue
2019 # old split location
2020 sub(/.*<[Ss][Tt][Rr][Oo][Nn][Gg]><img src=./, "", entry[i])
2021
2022 ltype = entry[i]
2023 #debug(1, "log: " ltype)
2024 sub(/>.*/, "", ltype) # leaves the URL of the smiley
2025 if (ltype ~ /smile/) ltype = "Found it"
2026 else if (ltype ~ /happy/) ltype = "Found it"
2027 else if (ltype ~ /note/) ltype = "Write note"
2028 else if (ltype ~ /sad/) ltype = "Didn'"'"'t Find it"
2029 else if (ltype ~ /attended/) ltype = "Attended"
2030 else if (ltype ~ /rsvp/) ltype = "Will Attend"
2031 else if (ltype ~ /greenlight/) ltype = "Green"
2032 else if (ltype ~ /traffic_cone/) ltype = "Archive"
2033 else if (ltype ~ /disabled/) ltype = "Temporarily Disable Listing"
2034 else if (ltype ~ /coord_update/) ltype = "Update Coordinates"
2035 else ltype = "Unknown"
2036
2037 ldate = entry[i]
2038 # split off &nbsp;/blank
2039 sub(/^[^>]*>[^ ;]*[ ;]/, "", ldate)
2040 sub(/ by <.*/, "", ldate)
2041 sub(/ by /, "", ldate)
2042 sub(/.*LogDate.>about /, "", ldate)
2043 sub(/.*LogDate.>/, "", ldate)
2044 sub(/<.*/, "", ldate)
2045 gsub(/-/, "/", ldate)
2046 debug(1, "logdate: " ldate)
2047 if (ldate ~ /ago/)
2048 {
2049 cmd = sprintf("%s -d \"12am %s\" +%%Y-%%m-%%dT07:00:00Z",
2050 DATE, ldate)
2051 cmd | getline ldate; close(cmd)
2052 }
2053 else
2054 {
2055 n = split(ldate, fld, " ")
2056 if (n >= 2)
2057 {
2058 #old format: August 18
2059 mm = Month[fld[1]]
2060 dd = fld[2] + 0
2061 if (n >= 3)
2062 yy = fld[3]
2063 if (yy+0 == 0)
2064 yy = YR
2065 ldate = sprintf("%d-%02d-%02dT07:00:00", yy, mm, dd)
2066 }
2067 n = split(ldate, fld, "/")
2068 if (n == 3)
2069 {
2070 #new format: 08/18/2011
2071 if (DATEFMT == 1)
2072 ldate = sprintf("%d-%02d-%02dT07:00:00",
2073 fld[3], fld[2], fld[1])
2074 else
2075 ldate = sprintf("%d-%02d-%02dT07:00:00",
2076 fld[3], fld[1], fld[2])
2077 debug(1, "logdate: " ldate)
2078 }
2079 }
2080
2081 lfinder = entry[i]
2082 sub(/[^<]*</, "", lfinder) # Delete all before <A NAME...
2083
2084 logid = lfinder
2085 sub(/[^"]*"/, "", logid)
2086 sub(/.* id="/, "", logid)
2087 sub(/.*LUID=/, "", logid)
2088 sub(/\".*/, "", logid)
2089 debug(1, "logid: " logid)
2090
2091 guid = lfinder
2092 debug(1, "guid: " guid)
2093 #sub(/[^>]*>/, "", guid) # Delete all before <A HREF...
2094 #sub(/>.*/, "", guid) # Delete all after <A HREF...
2095 sub(/.*guid=/, "", guid)
2096 sub(/\".*/, "", guid)
2097 sub(/\&.*/, "", guid)
2098 sub(/. id=.*/, "", guid)
2099 debug(1, "guid: " guid)
2100
2101 #debug(1, "lfinder: " lfinder)
2102 sub(/[^>]*>/, "", lfinder) # Delete all before <A HREF...
2103 #debug(1, "lfinder: " lfinder)
2104 #sub(/[^>]*>/, "", lfinder) # Delete all before name
2105 sub(/<.*/, "", lfinder) # Delete all after name
2106 lfinder = umlauts(lfinder)
2107 debug(1, "lfinder: " lfinder)
2108
2109 ltext = entry[i]
2110 sub(/.*found\)<br .>/, "", ltext)
2111 sub("</font>.*", "", ltext)
2112 sub("<a href=.log.aspx[^>]*>[^<]*</a>", "", ltext)
2113 sub("<a href=.upload.aspx[^>]*>[^<]*</a>", "", ltext)
2114 # remove remaining HTML tags from log text. Seems to be a good
2115 # idea in any case, independent of NOHTML setting!
2116 ltext = htmlclean(ltext)
2117 ltext = umlauts(ltext)
2118
2119 if (lfinder == USERNAME && ltype == "Found it")
2120 sym = "Geocache Found"
2121 if (lfinder == USERNAME && ltype == "Attended")
2122 sym = "Geocache Found"
2123 tagstart(4, "groundspeak:log", "id=\"" logid "\"")
2124 tagtext(5, "groundspeak:date", ldate)
2125 tagtext(5, "groundspeak:type", ltype)
2126 tagptext(5, "groundspeak:finder", "id=\"" guid "\"", lfinder)
2127 tagptext(5, "groundspeak:text", "encoded=\"" "False" "\"", ltext)
2128 tagend(4, "groundspeak:log")
2129 }
2130 if (nlogs > 1)
2131 tagend(3, "groundspeak:logs")
2132 }
2133
2134 tagstart(3, "groundspeak:travelbugs", "/")
2135
2136 tagend(2, "groundspeak:cache")
2137 tagtext(2, "sym", sym)
2138 tagend(1, "wpt")
2139
2140 # add Additional Waypoints in wpt form
2141 if (!NOWPTS && wplist)
2142 {
2143 split(wplist, wps, "\n")
2144 i = 0
2145 for (wp in wps)
2146 ++i
2147 wp = 0
2148 while (wp < i)
2149 {
2150 ++wp
2151 # lat lon|prefix|lookup|wpname|url|note
2152 # i.e.: lat="44.888267" lon="-93.159233"|PC|PARK|http://...
2153 # |GCPMG6-Parking (Parking Area)|.31 miles from cache.
2154 debug(1, "wps: " wps[wp])
2155 split(wps[wp], line, "|")
2156 if (line[1] &&
2157 (!NOZERO || (line[1] !~ "lat=\"0.000000\" lon=\"0.000000\"") ) )
2158 {
2159 # line format: coords|prefix|lookup|wpname|note
2160 tagstart(1, "wpt", line[1])
2161 #tagtext(2, "time", "...")
2162 tagtext(2, "name", line[2] substr(gcid,3))
2163 tagtext(2, "cmt", line[6] ? line[6] : "")
2164 statname = line[4]
2165 gsub(" \\(.*\\).*", "", statname)
2166
2167 desc = line[4]
2168 sub(" \\(.*", "", desc)
2169 tagtext(2, "desc", desc)
2170
2171 tagtext(2, "url", line[5])
2172
2173 urlname = desc
2174 tagtext(2, "urlname", urlname)
2175
2176 stattype = line[4]
2177 gsub(".*\\(", "", stattype)
2178 gsub("\\).*", "", stattype)
2179 tagtext(2, "sym", stattype)
2180 tagtext(2, "type", "Waypoint|" stattype)
2181 tagend(1, "wpt")
2182 }
2183 }
2184 }
2185 wpt_init()
2186}
2187END {
2188 if (!INCR && !first)
2189 tagend(0, "gpx")
2190}
2191' | $POSTPROC