tools/geo-html2gpx

   1 #!/bin/sh
   2
   3 PROGNAME="$0"
   4
   5 usage() {
   6         cat <<EOF
   7 NAME
   8         `basename $PROGNAME`- Convert gc.com *printable* web pages into GPX
   9
  10 SYNOPSIS
  11         `basename $PROGNAME` [options] [gc-com.html]...
  12
  13 DESCRIPTION
  14         Convert gc.com *printable* web pages into GPX, including
  15         cache description and all logs.
  16
  17         The *printable* web pages can be fetched using geo-nearest,
  18         geo-newest, geo-placed, geo-found, or geo-gid with the -H option.
  19
  20 OPTIONS
  21         -b              Normalize output by postprocessing with gpsbabel
  22         -e              Encode hints with rot13 (e.g. NORTH = ABEGU)
  23         -i              Incremental, no XML and GPX headers
  24         -l number       Maximum number of log entries to be exported [unlimited]
  25         -n              No HTML in descriptions (experimental)
  26         -o FMT          Output FMT instead of GPX by using gpsbabel
  27         -u username     Indicate found status for username [$USERNAME]
  28         -w              Do not add "Additional Waypoints" to the GPX output
  29         -z              Do not output waypoints with "zero" coordinates
  30         -E var=val      Set environment "var" to "val"
  31                         i.e. DATEFMT=0|1
  32         -D lvl          Debug level
  33
  34 DEFAULTS
  35         Defaults can also be set with variables in file \$HOME/.georc:
  36
  37             DATEFMT=[0|1];
  38
  39 DATE FORMATS
  40         Geocaching.com date formats that are compatible:
  41
  42             GC Format   Example     Compatible
  43             YYYY-MM-DD  2011-07-13  yes
  44             YYYY/MM/DD  2011/07/13  yes
  45             MM/DD/YYYY  07/13/2011  yes
  46             DD/MM/YYYY  13/07/2011  yes if DATEFMT=1 in \$HOME/.georc
  47             DD/Mmm/YYYY 13/Jul/2001 no
  48             Mmm/DD/YYYY Jul/13/2011 no
  49             DD Mmm YY   13 Jul 11   yes (english only)
  50
  51         Change them here:
  52
  53             http://www.geocaching.com/account/ManagePreferences.aspx
  54
  55 EXAMPLES
  56         Convert into GPX:
  57
  58             geo-found -n9999 -H. > /dev/null
  59             geo-html2gpx *.html > found.gpx
  60 EOF
  61
  62         exit 1
  63 }
  64
  65 #
  66 #       Report an error and exit
  67 #
  68 error() {
  69         echo "`basename $PROGNAME`: $1" >&2
  70         exit 1
  71 }
  72
  73 debug() {
  74         if [ $DEBUG -ge $1 ]; then
  75             echo "`basename $PROGNAME`: $2" >&2
  76         fi
  77 }
  78
  79 if [ `uname` = 'Darwin' ]; then
  80     awk=gawk
  81     date=gdate
  82 else
  83     awk=awk
  84     date=date
  85 fi
  86
  87 #
  88 #       Read RC file, if there is one
  89 #
  90 USERNAME=
  91 if [ -f $HOME/.georc ]; then
  92         . $HOME/.georc
  93         # N.B. must switch to read_rc_file if LAT/LON is ever needed here
  94 fi
  95 #
  96
  97 #       Process the options
  98 #
  99 POSTPROC="cat"
 100 DEBUG=0
 101 INCR=0
 102 NOWPTS=0
 103 NOZERO=0
 104 NOHTML=0
 105 DECODE=1
 106 NUMLOGS=1000000
 107 while getopts "beE:iwzl:no:u:D:h?" opt
 108 do
 109         case $opt in
 110         b)      POSTPROC="gpsbabel -igpx -f- -ogpx -F-";;
 111         e)      DECODE=0;;
 112         E)      eval "$OPTARG";;
 113         i)      INCR=1;;
 114         l)      NUMLOGS="$OPTARG";;
 115         o)      POSTPROC="gpsbabel -igpx -f- -o$OPTARG -F-";;
 116         n)      NOHTML=1;;
 117         u)      USERNAME="$OPTARG";;
 118         w)      NOWPTS=1;;
 119         z)      NOZERO=1;;
 120         D)      DEBUG="$OPTARG";;
 121         h|\?)   usage;;
 122         esac
 123 done
 124 shift `expr $OPTIND - 1`
 125
 126 #
 127 #       Main Program
 128 #
 129 YR=`date +"%Y"`
 130
 131 case `$awk --version` in
 132 "GNU Awk"*)     ;;
 133 *)              error "awk is not GNU awk!";;
 134 esac
 135
 136 cat "$@" \
 137 | tr -d '\001\002\003\004\005\006\007\015\021\022\023\024\026\030' \
 138 | sed 's/<A /\
 139 <A /g' |
 140 $awk -vDEBUG=$DEBUG -vINCR=$INCR \
 141     -vNOWPTS=$NOWPTS -vNOZERO=$NOZERO \
 142     -vNOHTML=$NOHTML \
 143     -vDECODE=$DECODE \
 144     -vUSERNAME="$USERNAME" \
 145     -vDATE="$date" \
 146     -vDATEFMT="$DATEFMT" \
 147     -vYR="$YR" -vNUMLOGS=$NUMLOGS \
 148 '
 149 # Copyright (c) 2010 Dan Saar
 150 #
 151 # Permission is hereby granted, free of charge, to any person obtaining a copy
 152 # of this software and associated documentation files (the "Software"), to deal
 153 # in the Software without restriction, including without limitation the rights
 154 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 155 # copies of the Software, and to permit persons to whom the Software is
 156 # furnished to do so, subject to the following conditions:
 157 #
 158 # The above copyright notice and this permission notice shall be included in
 159 # all copies or substantial portions of the Software.
 160 #
 161 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 162 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 163 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 164 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 165 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 166 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 167 # THE SOFTWARE.
 168
 169 function prsJSON_hex2num(s,     rv, ii, len, k)
 170 {
 171    rv = 0
 172    s = tolower(s)
 173    len = length(s)
 174
 175    for (ii = 1; ii <= len; ii++)
 176    {
 177       k = index("0123456789abcdef", substr(s, ii, 1))
 178       if (k > 0)
 179          rv = rv * 16 + (k-1)
 180       else
 181          break;
 182    }
 183
 184    return rv
 185 }
 186
 187 function prsJSON_EncodeAsUTF8( v,      s, p1, p2, p3, p4, cs )
 188 {
 189    cs = "\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377"
 190
 191    if ( v < 128 )
 192       s = sprintf("%c", v )
 193
 194    else if ( v < 2048 ) # 110xxxxx 10xxxxxx
 195    {
 196       p1 = int(v/64) % 32
 197       p2 = v % 64
 198       s = substr(cs, 65+p1, 1) substr(cs, p2+1, 1)
 199    }
 200
 201    else if ( v < 65536 ) # 1110xxxx 10xxxxxx 10xxxxxx
 202    {
 203       p1 = int(v/4096) % 16
 204       p2 = int(v/64) % 64
 205       p3 = v % 64
 206       s = substr(cs, 97+p1, 1) substr(cs, p2+1, 1) substr(cs, p3+1, 1)
 207    }
 208
 209    else if ( v < 1114112 ) # 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 210    {
 211       p1 = int(v/262144) % 8
 212       p2 = int(v/4096) % 64
 213       p3 = int(v/64) % 64
 214       p4 = v % 64
 215       s = substr(cs, 113+p1, 1) substr(cs, p2+1, 1) substr(cs, p3+1, 1) substr(cs, p4+1, 1)
 216    }
 217
 218    else
 219       s = ""
 220
 221    return s;
 222 }
 223
 224 function prsJSON_UnescapeString(jsonString,     matchedString, matchedValue)
 225 {
 226    if (jsonString == "\"\"")
 227       return ""
 228
 229    if (jsonString ~ /^".+"$/)
 230       jsonString = substr(jsonString,2,length(jsonString)-2)
 231
 232    gsub(/\\\\/, "\\u005C", jsonString)
 233    gsub(/\\"/, "\"", jsonString)
 234    gsub(/\\\//, "/", jsonString)
 235    gsub(/\\b/, "\b", jsonString)
 236    gsub(/\\f/, "\f", jsonString)
 237    gsub(/\\n/, "\n", jsonString)
 238    gsub(/\\r/, "\r", jsonString)
 239    gsub(/\\t/, "\t", jsonString)
 240
 241    if (match(jsonString, /\\[^u]/))
 242       return "ParseJSON Error: Invalid String at " jsonString
 243
 244    # handle encoded UTF-16 surrogates
 245    while (match(jsonString, /\\uD[89AaBb][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]\\uD[CcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/))
 246    {
 247       matchedValue = (prsJSON_hex2num(substr(jsonString, RSTART+2, 4)) % 1024) * 1024 + prsJSON_hex2num(substr(jsonString, RSTART+8, 4)) % 1024 + 65536
 248       #print matchedValue, substr(jsonString, RSTART+2, 4), substr(jsonString, RSTART+8, 4)
 249       matchedString = prsJSON_EncodeAsUTF8( matchedValue )
 250       sub(/\\uD[89AaBb][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]\\uD[CcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/, matchedString, jsonString)
 251    }
 252
 253    while (match(jsonString, /\\u[0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/))
 254    {
 255       matchedValue = prsJSON_hex2num(substr(jsonString, RSTART+2, 4))
 256       matchedString = prsJSON_EncodeAsUTF8( matchedValue )
 257       sub(/\\u[0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/, matchedString, jsonString)
 258    }
 259
 260    return jsonString;
 261 }
 262
 263 function prsJSON_ValidString(jsonString)
 264 {
 265    return jsonString !~ /^ParseJSON Error: Invalid String at /
 266 }
 267
 268 function prsJSON_SetDataValue(jsonData, prefix, value)
 269 {
 270    jsonData[prefix] = value
 271 }
 272
 273 function prsJSON_Error(jsonStringArr, cnt, idx, jsonData, message)
 274 {
 275    split("", jsonData)
 276    prsJSON_SetDataValue(jsonData, "1", sprintf("ParseJSON Error: %s at ", message) (idx <= cnt ? jsonStringArr[idx] : ""))
 277    split("", jsonStringArr)
 278    return cnt + 1
 279 }
 280
 281 function prsJSON_CopyError(jsonData, tv)
 282 {
 283    split("", jsonData)
 284    prsJSON_SetDataValue(jsonData, "1", tv[1])
 285 }
 286
 287 function prsJSON_ParseNumber(jsonStringArr, cnt, idx, jsonData, prefix)
 288 {
 289    if (idx <= cnt)
 290    {
 291       if (match(jsonStringArr[idx], /^(\-?)(0|[123456789][0123456789]*)(\.[0123456789]+)?([eE][+-]?[0123456789]+)?/))
 292       {
 293          prsJSON_SetDataValue(jsonData, prefix, substr(jsonStringArr[idx], 1, RLENGTH))
 294          jsonStringArr[idx] = length(jsonStringArr[idx]) >= RLENGTH+1 ? substr(jsonStringArr[idx], RLENGTH+1) : ""
 295       }
 296       else
 297          idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Number not found") # starts like a number, but doesnt match the REGEX
 298    }
 299
 300    return idx
 301 }
 302
 303 function prsJSON_ParseString(jsonStringArr, cnt, idx, jsonData, prefix,      jsonString, idxn, idxs, idxq, t)
 304 {
 305    if (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == "\"")
 306    {
 307       idxn = 2
 308       jsonString = jsonStringArr[idx]
 309
 310       do
 311       {
 312          t = length(jsonString) >= idxn ? substr(jsonString, idxn) : ""
 313          idxs = index(t, "\\")
 314          idxq = index(t, "\"")
 315
 316          # no valid close quote found
 317          if (idxq == 0)
 318          {
 319             if (idx == cnt)
 320                break;
 321
 322             idx++
 323             jsonString = jsonString "," jsonStringArr[idx]
 324          }
 325
 326          # a valid close quote was found - not before a slash
 327          if (idxq != 0 && (idxs == 0 || (idxs != 0 && idxq < idxs)))
 328             break;
 329
 330          if (idxs != 0 && idxq == idxs + 1) # slash quote
 331             idxn = idxn + idxq
 332
 333          else
 334             idxn = idxn + idxs + 1
 335
 336       } while (1)
 337
 338       if (idxq > 0)
 339       {
 340          t = substr(jsonString, 1, idxn+idxq-1)
 341          if (match(t, /[\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037]/) == 0)
 342          {
 343             t = prsJSON_UnescapeString(t)
 344             if ( prsJSON_ValidString(t) )
 345             {
 346                prsJSON_SetDataValue(jsonData, prefix, t)
 347                jsonStringArr[idx] = length(jsonString) >= idxn+idxq ? substr(jsonString,idxn+idxq) : ""
 348             }
 349             else
 350                idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Invalid string")
 351          }
 352          else
 353             idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Invalid character in string")
 354       }
 355       else
 356          idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Unterminated string")
 357    }
 358    else
 359       idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "String expected")
 360
 361    return idx
 362 }
 363
 364 function prsJSON_ParseObject(jsonStringArr, cnt, idx, jsonData, prefix,     tv )
 365 {
 366    if (idx <= cnt)
 367    {
 368       sub(/^\{[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip open { and skipwhite
 369
 370       while (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) != "}")
 371       {
 372          idx = prsJSON_ParseString(jsonStringArr, cnt, idx, tv, "1")
 373
 374          if (idx <= cnt && length(tv[1]) == 0)
 375              idx = prsJSON_Error(jsonStringArr, cnt, idx, tv, "Empty string used for property name")
 376
 377          if (idx <= cnt)
 378          {
 379             sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 380
 381             if ( length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == ":" )
 382             {
 383                sub(/^:[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip colon and skipwhite
 384
 385                if ( length(jsonStringArr[idx]) > 0 )
 386                {
 387                   idx = prsJSON_ParseJSONInt(jsonStringArr, cnt, idx, jsonData, prefix != "" ? prefix SUBSEP tv[1] : tv[1])
 388                   if (idx <= cnt)
 389                   {
 390                      sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 391
 392                      if (length(jsonStringArr[idx]) == 0 && idx < cnt)
 393                      {
 394                         idx++
 395                         sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 396                         if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) == "}")
 397                            idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected object property")
 398                      }
 399
 400                      else if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "}")
 401                         idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected object property or closing brace")
 402                   }
 403                }
 404                else
 405                   idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected JSON value (1)")
 406             }
 407             else
 408                idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected colon")
 409          }
 410          else
 411             prsJSON_CopyError(jsonData, tv)
 412       }
 413
 414       if (idx <= cnt && (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "}"))
 415          idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected closing brace")
 416
 417       if (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == "}")
 418          sub(/^\}[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip close } and skipwhite
 419    }
 420
 421    return idx
 422 }
 423
 424 function prsJSON_ParseArray(jsonStringArr, cnt, idx, jsonData, prefix,     ii)
 425 {
 426    if (idx <= cnt)
 427    {
 428       sub(/^\[[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip open bracket and skipwhite
 429       ii = 1
 430
 431       while (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) != "]")
 432       {
 433          idx = prsJSON_ParseJSONInt(jsonStringArr, cnt, idx, jsonData, prefix != "" ? prefix SUBSEP ii : ii )
 434          ii++
 435
 436          if (idx <= cnt)
 437          {
 438             sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 439
 440             if (length(jsonStringArr[idx]) == 0 && idx < cnt)
 441             {
 442                idx++;
 443                sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 444                if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) == "]")
 445                   idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected array value")
 446             }
 447
 448             else if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "]")
 449                idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected array value or closing bracket")
 450          }
 451       }
 452
 453       if (idx <= cnt && (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "]"))
 454          idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected closing bracket")
 455
 456       if (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == "]")
 457          sub(/^\][ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip close bracket and skipwhite
 458    }
 459
 460    return idx
 461 }
 462
 463 function prsJSON_ParseJSONInt(jsonStringArr, cnt, idx, jsonData, prefix,     tk)
 464 {
 465    if (idx <= cnt)
 466    {
 467       sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 468
 469       if (length(jsonStringArr[idx]) > 0)
 470       {
 471          tk = substr(jsonStringArr[idx], 1, 1)
 472          if (tk == "\"" && prefix != "")
 473             idx = prsJSON_ParseString(jsonStringArr, cnt, idx, jsonData, prefix)
 474          else if (tk ~ /^[0123456789-]/ && prefix != "")
 475             idx = prsJSON_ParseNumber(jsonStringArr, cnt, idx, jsonData, prefix)
 476          else if (jsonStringArr[idx] ~ /^true/ && prefix != "")
 477          {
 478             prsJSON_SetDataValue(jsonData, prefix, "<<true>>")
 479             jsonStringArr[idx] = length(jsonStringArr[idx]) <= 4 ? "" : substr(jsonStringArr[idx],5)
 480          }
 481          else if (jsonStringArr[idx] ~ /^false/ && prefix != "")
 482          {
 483             prsJSON_SetDataValue(jsonData, prefix, "<<false>>")
 484             jsonStringArr[idx] = length(jsonStringArr[idx]) <= 5 ? "" : substr(jsonStringArr[idx],6)
 485          }
 486          else if (jsonStringArr[idx] ~ /^null/ && prefix != "")
 487          {
 488             prsJSON_SetDataValue(jsonData, prefix, "<<null>>")
 489             jsonStringArr[idx] = length(jsonStringArr[idx]) <= 4 ? "" : substr(jsonStringArr[idx],5)
 490          }
 491          else if (tk == "{")
 492             idx = prsJSON_ParseObject(jsonStringArr, cnt, idx, jsonData, prefix)
 493          else if (tk == "[")
 494             idx = prsJSON_ParseArray(jsonStringArr, cnt, idx, jsonData, prefix)
 495          else
 496             idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected JSON value (2)")
 497
 498          if (idx <= cnt)
 499             sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 500       }
 501
 502       if (prefix == "" && idx <= cnt && length(jsonStringArr[idx]) != 0)
 503          idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected end of JSON text")
 504       else if (prefix == "" && idx+1 <= cnt)
 505       {
 506          idx++
 507          idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected end of JSON text (2)")
 508       }
 509
 510    }
 511
 512    return idx
 513 }
 514
 515 #
 516 # JSON Formatting Routines
 517 #
 518
 519 function useJSON_ArrayCount( possibleArray,     a, min, max, cnt, rv)
 520 {
 521    cnt = 0
 522
 523    for ( a in possibleArray )
 524    {
 525       if (possibleArray[a] "" !~ /^[0123456789][0123456789]*$/)
 526          return -1
 527
 528       if ( cnt == 0 )
 529       {
 530          min = possibleArray[a]
 531          max = possibleArray[a]
 532       }
 533       else
 534       {
 535          if (min == possibleArray[a] || max == possibleArray[a])
 536             return -1
 537
 538          if (possibleArray[a] < min)
 539             min = possibleArray[a]
 540
 541          if (max < possibleArray[a])
 542             max = possibleArray[a]
 543       }
 544
 545       cnt++
 546    }
 547
 548    if (min == 1 && max == cnt)
 549       return cnt
 550
 551    return -1
 552 }
 553
 554 function useJSON_GetObjectMembers(jsonSchema, prefix)
 555 {
 556    if (prefix == "") prefix = "<<novalue>>"
 557    return prefix in jsonSchema ? jsonSchema[prefix] : ""
 558 }
 559
 560 # quick sort array arr
 561 function utlJSON_qsortArray(arr, left, right,   i, last, t)
 562 {
 563    if (left >= right)   # do nothing if array has less than 2 elements
 564       return
 565    i = left + int((right-left+1)*rand())
 566    t = arr[left];
 567    arr[left] = arr[i];
 568    arr[i] = t
 569    last = left  # arr[left] is now partition element
 570    for (i = left+1; i <= right; i++)
 571    {
 572       if (arr[i] < arr[left])
 573       {
 574          last++
 575          t = arr[last];
 576          arr[last] = arr[i];
 577          arr[i] = t
 578       }
 579    }
 580    t = arr[left];
 581    arr[left] = arr[last];
 582    arr[last] = t
 583    utlJSON_qsortArray(arr, left, last-1)
 584    utlJSON_qsortArray(arr, last+1, right)
 585 }
 586
 587 function useJSON_GetSchema(jsonData, jsonSchema,    a, tidx, tv, sv, idx)
 588 {
 589    split("", jsonSchema)
 590    for (a in jsonData)
 591    {
 592       while (match(a, SUBSEP "[^" SUBSEP "]+$"))
 593       {
 594          tidx = substr(a,1,RSTART-1)
 595          tv = substr(a,RSTART+1)
 596          sv = (tidx in jsonSchema) ? jsonSchema[tidx] : ""
 597          # if ( sv != tv && sv !~ "^" tv SUBSEP && sv !~ SUBSEP tv "$" && sv !~ SUBSEP tv SUBSEP )
 598          # Rephrase this using index so object member names with regex characters work
 599          if ( sv != tv && index(sv, tv SUBSEP) != 1 && (length(sv) <= length(tv)+1 || substr(sv, length(sv)-length(tv)) != SUBSEP tv) && index(sv, SUBSEP tv SUBSEP) == 0 )
 600             jsonSchema[tidx] = sv (sv == "" ? "" : SUBSEP)  tv
 601          a = tidx
 602       }
 603
 604       tidx = "<<novalue>>"
 605       tv = a
 606       sv = (tidx in jsonSchema) ? jsonSchema[tidx] : ""
 607       if ( sv != tv && sv !~ "^" tv SUBSEP && sv !~ SUBSEP tv "$" && sv !~ SUBSEP tv SUBSEP )
 608          jsonSchema[tidx] = sv (sv == "" ? "" : SUBSEP)  tv
 609    }
 610 }
 611
 612 function useJSON_EscapeString(s,     ii, c, t, t2, t3, t4, cs)
 613 {
 614    cs = "\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377"
 615    gsub(/\\/, "\\u005C", s)
 616    gsub(/"/, "\\\"", s)
 617    #gsub(/\//, "\\/", s) # required to decode, but not to encode
 618    gsub(/\b/, "\\b", s)
 619    gsub(/\f/, "\\f", s)
 620    gsub(/\n/, "\\n", s)
 621    gsub(/\r/, "\\r", s)
 622    gsub(/\t/, "\\t", s)
 623
 624    for ( ii = 1 ; ii <= length(s) ; ii++ )
 625    {
 626       t = substr(s,ii,1)
 627
 628       if (t == "\000") # having \000 in list below doesnt work in all awks
 629       {
 630          c = 0
 631          s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii==length(s) ? "" : substr(s, ii+1))
 632          ii += 5
 633       }
 634       else
 635       {
 636          c = index("\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037", t)
 637          c = c == 0 ? -1 : c
 638
 639          if ( c >= 0 )
 640          {
 641             s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii==length(s) ? "" : substr(s, ii+1))
 642             ii += 5
 643          }
 644       }
 645
 646       t = index(cs, t)
 647       t2 = ii+1 <= length(s) ? index(cs, substr(s,ii+1,1)) : 0
 648       t3 = ii+2 <= length(s) ? index(cs, substr(s,ii+2,1)) : 0
 649       t4 = ii+3 <= length(s) ? index(cs, substr(s,ii+3,1)) : 0
 650
 651       if ( c < 0 && t > 64 && t <= 96 && ii+1 <= length(s) && t2 > 0 && t2 <= 64) # two character UTF-8 sequence
 652       {
 653          c = (t - 65)*64 + (t2-1)
 654          s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii+1==length(s) ? "" : substr(s, ii+2))
 655          ii += 5
 656       }
 657
 658       else if ( c < 0 && t > 96 && t <= 112 && ii+2 <= length(s) && t2 > 0 && t2 <= 64 && t3 > 0 && t3 <= 64) # three character UTF-8 sequence
 659       {
 660          c = (t - 97)*4096 + (t2-1)*64 + (t3-1)
 661          if ( c < 65536 )
 662          {
 663             s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii+2==length(s) ? "" : substr(s, ii+3))
 664             ii += 5
 665          }
 666          else
 667          {
 668             # encode in JSON-style with two \u#### UTF-16 surrogates
 669             # printf("1: %08X\n", c)
 670             s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X\\u%04X", (c/1024)%1024 + 55296, c%1024 + 56320) (ii+3==length(s) ? "" : substr(s, ii+4))
 671             ii += 11
 672          }
 673       }
 674
 675       # four character UTF-8 sequence, encode in JSON-style with two \u#### UTF-16 surrogates
 676       else if ( c < 0 && t > 112 && t <= 120 && ii+3 <= length(s) && t2 > 0 && t2 <= 64 && t3 > 0 && t3 <= 64 && t4 > 0 && t4 <=  64)
 677       {
 678          c = (t - 113)*262144 + (t2-1)*4096 + (t3-1)*64 + (t4-1)
 679          # printf("2: %08X, %d, %d, %d, %d\n", c, t, t2, t3, t4)
 680          # printf("\\u%04X\\u%04X\n", (c/1024)%1024 + 55296, c%1024 + 56320)
 681          c -= 65536
 682          s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X\\u%04X", (c/1024)%1024 + 55296, c%1024 + 56320) (ii+3==length(s) ? "" : substr(s, ii+4))
 683          ii += 11
 684       }
 685    }
 686
 687    return "\"" s "\""
 688 }
 689
 690 function useJSON_GetDataValue(jsonData, prefix)
 691 {
 692    return prefix in jsonData ? jsonData[prefix] : "<<novalue>>"
 693 }
 694
 695 function useJSON_PrettyFormat(s, pretty)
 696 {
 697    if (s == "" || pretty <= 0) return s
 698
 699    # dont sprintf the whole thing, some awks have short buffers for sprintf
 700    return sprintf("%*.*s", (pretty-1)*3, (pretty-1)*3, "") s (s == "}" || s == "]" ? "" : "\n")
 701 }
 702
 703 function useJSON_FormatInt(jsonData, jsonSchema, prefix, pretty,     allLines, member, memberArr, memberList, arrCount, a, ii)
 704 {
 705    memberList = useJSON_GetObjectMembers(jsonSchema, prefix)
 706
 707    if ( memberList == "" )
 708    {
 709       a = useJSON_GetDataValue(jsonData, prefix)
 710       if ( a == "<<true>>" ) return "true"
 711       if ( a == "<<false>>" ) return "false"
 712       if ( a == "<<null>>" ) return "null"
 713       if ( a == "<<novalue>>" ) return "" # <<novalue>> is a help for dealing with empty arrays and objects
 714
 715       # if it looks like a number, encode it as such.  Cant tell a string from a number.
 716       if (a "" ~ /^(\-?)(0|[123456789][0123456789]*)(\.[0123456789]+)?([eE][+-]?[0123456789]+)?$/)
 717          return a
 718
 719       return useJSON_EscapeString(a)
 720    }
 721
 722    split(memberList, memberArr, SUBSEP)
 723    arrCount = useJSON_ArrayCount( memberArr )
 724
 725    if ( arrCount >= 0 )
 726    {
 727       allLines = "[" (pretty == 0 ? "" : "\n")
 728
 729       for ( ii = 1 ; ii <= arrCount ; ii++ )
 730          allLines = allLines useJSON_PrettyFormat(useJSON_FormatInt( jsonData, jsonSchema, prefix (prefix == "" ? "" : SUBSEP) ii, (pretty != 0 ? pretty+1 : 0)) (ii < arrCount ? "," : ""), pretty != 0 ? pretty+1 : 0)
 731       allLines = allLines useJSON_PrettyFormat("]", pretty)
 732
 733       return allLines
 734    }
 735
 736    allLines = "{" (pretty == 0 ? "" : "\n")
 737
 738    ii = 0
 739
 740    arrCount = 0
 741    for (a in memberArr)
 742       arrCount++
 743
 744    utlJSON_qsortArray(memberArr, 1, arrCount)
 745
 746    for ( ii = 1 ; ii <= arrCount ; ii++ )
 747       allLines = allLines useJSON_PrettyFormat(useJSON_EscapeString(memberArr[ii]) (pretty == 0 ? ":" : " : ") useJSON_FormatInt(jsonData, jsonSchema, prefix (prefix == "" ? "" : SUBSEP) memberArr[ii], (pretty != 0 ? pretty+1 : 0)) (ii < arrCount ? "," : ""), pretty != 0 ? pretty+1 : 0)
 748
 749    allLines = allLines useJSON_PrettyFormat("}", pretty)
 750
 751    return allLines
 752 }
 753
 754 #
 755 # Entry Points
 756 #
 757
 758 #
 759 # ParseJSON : Parse JSON text into an awk array
 760 #
 761 #    jsonString : JSON text
 762 #    jsonData : array of parsed JSON data
 763 #
 764 #    returns : N/A
 765 #
 766 function ParseJSON(jsonString, jsonData,      jsonStringArr, cnt)
 767 {
 768    # newlines split differently in some awks, replace them with formfeeds (also white space)
 769    # if (split("1\n2\n3", jsonData, ",") == 3) # is this an awk that splits newlines differently?
 770    gsub(/\n/, "\f", jsonString) # always replace literal newlines - allows compatibility when testing
 771
 772    split("", jsonData) # clear the array jsonData
 773    cnt = split(jsonString, jsonStringArr, ",")
 774    prsJSON_ParseJSONInt(jsonStringArr, cnt, 1, jsonData, "")
 775 }
 776
 777 #
 778 # FormatJSON : Format parsed JSON data back into JSON text
 779 #
 780 #    jsonData : array of parsed JSON data
 781 #    pretty : 0 = compact format, non-zero = pretty format
 782 #
 783 #    returns : string with JSON text
 784 #
 785 function FormatJSON(jsonData, pretty,    jsonSchema)
 786 {
 787    useJSON_GetSchema(jsonData, jsonSchema)
 788    return useJSON_FormatInt(jsonData, jsonSchema, "", pretty ? 1 : 0)
 789 }
 790
 791 #
 792 # JSONArrayLength : Find number of members in a JSON array
 793 #
 794 #    jsonData : array of parsed JSON data
 795 #    prefix : array name
 796 #
 797 #    returns : number of entries in the array
 798 #
 799 function JSONArrayLength(jsonData, prefix,     a, cnt, tv)
 800 {
 801    cnt = -1
 802
 803    for (a in jsonData)
 804    {
 805       if (prefix == "" || index(a, prefix) == 1)
 806       {
 807          tv = substr(a, prefix == "" ? 1 : (1+length(prefix)+1))
 808          if ( index(tv, SUBSEP) )
 809             tv = substr(tv, 1, index(tv, SUBSEP)-1)
 810          tv = tv + 0
 811          if ( tv > cnt )
 812             cnt = tv
 813       }
 814    }
 815
 816    return cnt
 817 }
 818
 819 #
 820 # JSONUnescapeString : turn a JSON-escaped string into UTF-8
 821 #
 822 #    jsonString : the escaped JSON string to convert
 823 #
 824 #    returns : the string in UTF-8
 825 #
 826 function JSONUnescapeString(jsonString)
 827 {
 828    return prsJSON_UnescapeString(jsonString)
 829 }
 830
 831 #
 832 # JSONIsTrue : return non-zero if the value is the true value
 833 #
 834 #    jsonValue : the value to test
 835 #
 836 #    returns : true or false
 837 #
 838 function JSONIsTrue(jsonValue)
 839 {
 840    return jsonValue == "<<true>>";
 841 }
 842
 843 #
 844 # JSONIsFalse : return non-zero if the value is the false value
 845 #
 846 #    jsonValue : the value to test
 847 #
 848 #    returns : true or false
 849 #
 850 function JSONIsFalse(jsonValue)
 851 {
 852    return jsonValue == "<<false>>";
 853 }
 854
 855 #
 856 # JSONIsNull : return non-zero if the value is the null value
 857 #
 858 #    jsonValue : the value to test
 859 #
 860 #    returns : true or false
 861 #
 862 function JSONIsNull(jsonValue)
 863 {
 864    return jsonValue == "<<null>>";
 865 }
 866
 867 #
 868 # JSONObjectMembers : get the set of members of an object
 869 #
 870 #    jsonData : array of parsed JSON data
 871 #    prefix : object name
 872 #    memberArr : [out] an array of the names of the object members, if the target was an object or an array
 873 #
 874 #    returns : If the target was actually an array rather than an object, the number of elements in the array
 875 #              Else, zero if the target was an object or a value
 876 #
 877 function JSONObjectMembers(jsonData, prefix, memberArr,     jsonSchema, memberList, rv, a)
 878 {
 879    useJSON_GetSchema(jsonData, jsonSchema)
 880    memberList = useJSON_GetObjectMembers(jsonSchema, prefix)
 881
 882    if ( memberList == "" )
 883    {
 884       split("", memberArr)
 885       return 0
 886    }
 887
 888    split(memberList, memberArr, SUBSEP)
 889    rv = useJSON_ArrayCount( memberArr )
 890    if ( rv == -1 ) # not an array, sort the object member names
 891    {
 892       rv = 0
 893       for (a in memberArr)
 894          rv++
 895
 896       utlJSON_qsortArray(memberArr, 1, rv)
 897       rv = 0
 898    }
 899    return rv
 900 }
 901 # End of Copyright (c) 2010 Dan Saar
 902
 903 function debug(lvl, text) {
 904     if (lvl <= DEBUG)
 905         print text > "/dev/stderr"
 906 }
 907
 908 function wpt_init() {
 909     available = "True"
 910     archived = "False"
 911     sym = "Geocache"
 912     json_log_bool = 0
 913     logs = ""
 914     logs_section = 0
 915     hints = ""
 916     lat = ""
 917     yy = 0
 918     wplist = ""
 919     nattr_yes = 0
 920     nattr_no = 0
 921     gs_type = ""
 922 }
 923
 924 function dec2utf(dec) {
 925     if (dec <= 0x7f)
 926         return sprintf("%c", dec)
 927     else if (dec <= 0x07ff)
 928         return sprintf("%c%c", 0xC0 + rshift(dec, 6), 0x80 + and(dec, 0x3F) )
 929 }
 930
 931 function asc2xml(txt,   o, ent, dec) {
 932     o = ""
 933     while (match(txt, "&#x[0123456789abcdefABCDEF]*;"))
 934     {
 935         o = o substr(txt, 1, RSTART-1)
 936         ent = toupper(substr(txt, RSTART+3, RLENGTH-4))
 937         txt = substr(txt, RSTART+RLENGTH)
 938         dec = hex2dec(ent)
 939         o = o dec2utf(dec)
 940     }
 941     txt = o txt
 942     o = ""
 943     while (match(txt, "&#[0123456789]*;"))
 944     {
 945         o = o substr(txt, 1, RSTART-1)
 946         ent = toupper(substr(txt, RSTART+2, RLENGTH-3))
 947         txt = substr(txt, RSTART+RLENGTH)
 948         dec = ent + 0
 949         o = o dec2utf(dec)
 950     }
 951     o = o txt
 952     return o
 953 }
 954
 955 function umlauts(text) {
 956     text = asc2xml(text)
 957     if(0)
 958     {
 959         # Somewhat minimal translation of HTML entities in titles
 960         gsub("&#228;", "\xc3\xa4", text)
 961         gsub("&#xE4;", "\xc3\xa4", text)
 962         gsub("&#246;", "\xc3\xb6", text)
 963         gsub("&#xF6;", "\xc3\xb6", text)
 964         gsub("&#252;", "\xc3\xbc", text)
 965         gsub("&#xFC;", "\xc3\xbc", text)
 966         gsub("&#196;", "\xc3\x84", text)
 967         gsub("&#xC4;", "\xc3\x84", text)
 968         gsub("&#214;", "\xc3\x96", text)
 969         gsub("&#xD6;", "\xc3\x96", text)
 970         gsub("&#220;", "\xc3\x9c", text)
 971         gsub("&#xDC;", "\xc3\x9c", text)
 972         gsub("&#223;", "\xc3\x9f", text)
 973         gsub("&#xDF;", "\xc3\x9f", text)
 974         gsub("&#176;", "\xc2\xb0", text)
 975         gsub("&#xB0;", "\xc2\xb0", text)
 976         gsub("&#180;", "\xc2\xb4", text)
 977         gsub("&#xB4;", "\xc2\xb4", text)
 978     }
 979     gsub("&amp;", "\\&", text)
 980     return text
 981 }
 982
 983 function htmlclean(text) {
 984     gsub("&nbsp;", " ", text)
 985     gsub("</?[pP][^>]*>", "\n", text)
 986     gsub("<[bB][rR][^>]*>", "\n", text)
 987     gsub("<[^>]*>", "", text)
 988     # compress whitespace
 989     gsub("\n\n\n*", "\n\n", text)
 990     gsub("[ \t][ \t]*", " ", text)
 991     return text
 992 }
 993
 994 function tableclean(text) {
 995     gsub("\n", "", text)
 996     gsub("&nbsp;", " ", text)
 997     # translate/remove HTML tags
 998     gsub("</?[pP][^>]*>", "\n", text)
 999     gsub("</[bB][rR][^>]*>", "", text)
1000     gsub("</?font[^>]*>", "", text)
1001     gsub("</?table[^>]*>", "", text)
1002     gsub("<t[rdh]>", "", text)
1003     gsub("</tr>", "\n", text)
1004     gsub("</t[dh][^>]*>", " | ", text)
1005     gsub("<[^>]*>", "", text)
1006     # compress whitespace
1007     gsub("[ \t][ \t]*", " ", text)
1008     return text
1009 }
1010
1011 function remdiv(text, tag) {
1012     if (tag != "")
1013         pat = ".*<div id=." tag ".[^>]*>[ \t\n]*"
1014     else
1015         pat = ".*<div[^>]*>[ \t\n]*"
1016     sub(pat, "", text)
1017     while (text !~ "/?div")
1018     {
1019         if (getline more <= 0)
1020             break
1021         text = text "\n" more
1022     }
1023     sub("[ \t\n]*</div>.*", "", text)
1024     debug(3, "Div:\n" text)
1025     return text
1026 }
1027
1028 function remspan(text, tag) {
1029     if (tag != "")
1030         pat = ".*<span id=." tag ".[^>]*>[ \t\n]*"
1031     else
1032         pat = ".*<span[^>]*>[ \t\n]*"
1033     sub(pat, "", text)
1034     while (text !~ "/?span")
1035     {
1036         if (getline more <= 0)
1037             break
1038         text = text "\n" more
1039     }
1040     sub("[ \t\n]*</span>.*", "", text)
1041     debug(3, "Span:\n" text)
1042     return text
1043 }
1044
1045 function remspanlong(text, tag) {
1046     if (tag != "")
1047         pat = ".*<span id=." tag ".[^>]*>[ \t\n]*"
1048     else
1049         pat = ".*<span[^>]*>[ \t\n]*"
1050     sub(pat, "", text)
1051     # i = "span level"
1052     i = 1; j = 0
1053     debug(2, length(text) "\t" i "   " j++ "   " text)
1054     # input is in text
1055     while (i != 0)
1056     {
1057         # emergency exit
1058         if (length(text) > 500000)
1059         {
1060             debug(0, "Warning: logs exceeded 500,000 bytes!")
1061             break
1062         }
1063         # cleanup: remove </*span...>, adjust "span level"
1064         while (text ~ "</*span.*>")
1065         {
1066             if (text ~ "</span>")
1067             {
1068                 --i; sub("</span>", "", text)
1069             }
1070             if (text ~ "<span.*>")
1071             {
1072                 ++i; sub("<span[^>]*>", "", text)
1073             }
1074         }
1075         debug(2, "=" length(text) "\t" i "   " j++ "   " text)
1076         # if "span level" down to zero, closing tag reached
1077         if (i == 0) break
1078         # get more input
1079         if (getline more <= 0)
1080             break
1081         text = text "\n" more
1082         debug(2, "+" length(more) "\t" i "   " j++ "   " more)
1083     }
1084     debug(1, length(text) "\t" i "   " j++)
1085     sub("[ \t\n]*</span>.*", "", text)
1086     gsub("&nbsp;", " ", text)
1087     if (tag == "CacheLogs")
1088         gsub("</?table[^>]*>", "", text)
1089     debug(3, "SpanLong:\n" text)
1090     return text
1091 }
1092
1093 function remwaypoints() {
1094     text = ""
1095     while (text !~ "</table>" && text !~ "No additional waypoints to display")
1096     {
1097         if (getline more <= 0)
1098             break
1099         text = text " " more
1100     }
1101     gsub("&nbsp;", " ", text)
1102     gsub("\n[ \t]*", "", text)
1103     debug(3, "Waypoints:\n" text "\nEnd Waypoints")
1104     return text
1105     # will return complete table contents! split by </tr> instead of
1106     # <STRONG><img...>
1107 }
1108
1109 function splitwaypoints(waypoints,
1110             line, fld, prefix, lookup, wpname, x, y, lat, lon) {
1111     text=""
1112     # separate lines
1113     split(waypoints, wps, "</tr>")
1114     i = 0
1115     for (wp in wps)
1116         ++i
1117     wp = 1 # skip header line
1118     while (wp < i)
1119     {
1120         ++wp
1121         # get URL from full table line
1122         url = wps[wp]
1123         gsub(".*href=.", "", url)
1124         gsub("\".*", "", url)
1125         if (url !~ "^http:")
1126         {
1127             url = ""
1128         }
1129         else
1130         {
1131             debug(1, "url: " url)
1132         }
1133         # individual fields without leading/trailing blanks, remove HTML tags
1134         split(wps[wp], line, "</td>")
1135         j = 0
1136         for (fld in line)
1137         {
1138             ++j
1139             debug(2, "Before Line[" fld "]: " line[fld])
1140             gsub("[ \t]*<[^>]*>", "", line[fld])
1141             gsub("^[ \t]*", "", line[fld])
1142             gsub("[ \t]*$", "", line[fld])
1143             debug(2, "after Line[" fld "]: " line[fld])
1144         }
1145         # 8 fields: 1st line old style
1146         # 9 fields: 1st line new style
1147         # 4 fields, [1]~"Note:": 2nd line old style
1148         # 4 fields, [2]~"Note:": 2nd line new style
1149         # else: drop
1150         if (j == 8)
1151         {
1152             # main information line, old style (pre-2010/07)
1153             if (!line[3]) continue
1154             prefix = substr(line[3] "00", 1, 2)
1155             lookup = line[4]
1156             wpname = line[5]
1157             lat = toupper(line[6])
1158             gsub(" *[EW].*", "", lat)
1159             split(lat, y)
1160             lat = y[2] + y[3]/60.0
1161             if (y[1] == "S")
1162                 lat = -lat
1163             lon = toupper(line[6])
1164             gsub("[NS] *[0-9]*.. *[0-9.]* ", "", lon)
1165             gsub("[^ 0-9.NESW-]", "", lon)
1166             split(lon, x)
1167             lon = x[2] + x[3]/60.0
1168             if (x[1] == "W")
1169                 lon = -lon
1170             text = text sprintf("\nlat=\"%.6f\" lon=\"%.6f\"|%s|%s|%s|%s",
1171                                 lat, lon, prefix, lookup, wpname, url)
1172         }
1173         else if (j == 9)
1174         {
1175             # main information line, new style (2010/07)
1176             if (!line[4]) continue
1177             prefix = substr(line[4] "00", 1, 2)
1178             lookup = line[5]
1179             wpname = line[6]
1180             lat = toupper(line[7])
1181             gsub(" *[EW].*", "", lat)
1182             split(lat, y)
1183             lat = y[2] + y[3]/60.0
1184             if (y[1] == "S")
1185                 lat = -lat
1186             lon = toupper(line[7])
1187             gsub("[NS] *[0-9]*.. *[0-9.]* ", "", lon)
1188             gsub("[^ 0-9.NESW-]", "", lon)
1189             split(lon, x)
1190             lon = x[2] + x[3]/60.0
1191             if (x[1] == "W")
1192                 lon = -lon
1193             text = text sprintf("\nlat=\"%.6f\" lon=\"%.6f\"|%s|%s|%s|%s",
1194                                 lat, lon, prefix, lookup, wpname, url)
1195         }
1196         else if (j == 4)
1197         {
1198             if (line[1] ~ "Note:")
1199             {
1200                 # continuation line, old style
1201                 text = text sprintf("|%s", line[2])
1202             }
1203             else if (line[2] ~ "Note:")
1204             {
1205                 # continuation line, new style
1206                 text = text sprintf("|%s", line[3])
1207             }
1208         }
1209     }
1210     debug(3, "Split WPs\n" text)
1211     return text
1212 }
1213
1214 function wpclean(waypoints,     line, fld, prefix, lookup, wpname, coords) {
1215     # simplify Additional Waypoints table:
1216     # prefixedname - name<br>coordfield<br>note
1217     text = ""
1218     split(waypoints, wps, "</tr>")
1219     i = 0
1220     for (wp in wps)
1221         ++i
1222     wp = 1
1223     while (wp < i)
1224     {
1225         ++wp
1226         split(wps[wp], line, "</td>")
1227         j = 0
1228         for (fld in line)
1229         {
1230             ++j
1231             gsub("[ \t]*<[^>]*>", "", line[fld])
1232             gsub("^[ \t]*", "", line[fld])
1233             gsub("[ \t]*$", "", line[fld])
1234         }
1235         # 8 fields: 1st line old style
1236         # 9 fields: 1st line new style
1237         # 4 fields, [1]~"Note:": 2nd line old style
1238         # 4 fields, [2]~"Note:": 2nd line new style
1239         # else: drop
1240         if (j == 8)
1241         {
1242             # main information line, old style (pre-2010/07)
1243             if (!line[3]) continue
1244             prefix = substr(line[3] "00", 1, 2) substr(gcid, 3)
1245             lookup = line[4]
1246             wpname = line[5]
1247             gsub(" \\(.*\\).*", "", wpname)
1248             coords = toupper(line[6])
1249             text = text sprintf("%s - %s<br />%s<br />", prefix, wpname, coords)
1250         }
1251         else if (j == 9)
1252         {
1253             # main information line, new style (2010/07)
1254             if (!line[4]) continue
1255             prefix = substr(line[4] "00", 1, 2) substr(gcid, 3)
1256             lookup = line[5]
1257             wpname = line[6]
1258             gsub(" \\(.*\\).*", "", wpname)
1259             coords = toupper(line[7])
1260             text = text sprintf("%s - %s<br />%s<br />", prefix, wpname, coords)
1261         }
1262         else if(j == 4)
1263         {
1264             if (line[1] ~ "Note:")
1265             {
1266                 # continuation line, old style
1267                 text = text sprintf("%s<br />", line[2])
1268             }
1269             else if (line[2] ~ "Note:")
1270             {
1271                 # continuation line, new style
1272                 text = text sprintf("%s<br />", line[3])
1273             }
1274         }
1275     }
1276     debug(3, "Clean WPs\n" text)
1277     return text
1278 }
1279
1280 function hex2dec(x,   val) {
1281     for (val = 0; length(x); x = substr(x, 2))
1282         val = 16*val + index("0123456789ABCDEF", substr(x, 1, 1)) - 1
1283     return val
1284 }
1285
1286 # Convert GC0000 to 58913
1287 function wp2id(wp,    val) {
1288     sub("^GC", "", wp)
1289     debug(5, "wp2id: " wp " ...")
1290     if ((length(wp) <= 4) && (wp < "G000"))
1291     {
1292         # old hex style
1293         val = hex2dec(wp)
1294         debug(5, "wp2id hex: " val " ...")
1295         return val
1296     }
1297     # new style, base-31, can have 4 or more places!
1298     set = "0123456789ABCDEFGHJKMNPQRTVWXYZ"
1299     val = 0
1300     for (pos = 1; pos <= length(wp); ++pos)
1301     {
1302         val *= 31
1303         val += index(set, substr(wp, pos, 1)) - 1
1304     }
1305     val = val - 411120
1306     debug(5, "wp2id id: " val " ...")
1307     return val
1308 }
1309
1310 # to decode hints: rot13 http://lorance.freeshell.org/rot13/
1311 function rot13 (string) {
1312     ROTFROM = "nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM"
1313     ROTTO   = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
1314     retstr = ""
1315     for (pos = 0; pos < length(string); pos++)
1316     {
1317         char = substr(string,pos + 1,1)
1318         rotpos = index(ROTFROM,char)
1319         if (rotpos > 0)
1320             char = substr(ROTTO,rotpos,1)
1321         retstr = retstr char
1322     }
1323     return retstr
1324 }
1325
1326 function tagstart(lvl, tag, parms) {
1327     printf "%*s", lvl*2, ""
1328     if (parms == "")
1329         printf "<%s>\n", tag
1330     else
1331         printf "<%s %s>\n", tag, parms
1332 }
1333
1334 function tagend(lvl, tag) {
1335     printf "%*s", lvl*2, ""
1336     printf "</%s>\n", tag
1337 }
1338
1339 function ee(text) {
1340     gsub(/&/, "\\&amp;", text)
1341     gsub(/</, "\\&lt;", text)
1342     gsub(/>/, "\\&gt;", text)
1343     return text
1344 }
1345
1346 function tagtext(lvl, tag, text) {
1347     text = ee(text)
1348     printf "%*s", lvl*2, ""
1349     printf "<%s>%s</%s>\n", tag, text, tag
1350 }
1351
1352 function tagptext(lvl, tag, parms, text) {
1353     text = ee(text)
1354     printf "%*s", lvl*2, ""
1355     printf "<%s %s>%s</%s>\n", tag, parms, text, tag
1356 }
1357
1358 function attr_begin1(gif, id, text) {
1359     debug(1, "attr_begin1: " gif " " id " \"" text "\"")
1360     attr_id[gif] = id; attr_text[gif] = text
1361     debug(1, "attr_id: " attr_id["slealth"])
1362     debug(1, "attr_id: " attr_id[gif])
1363 }
1364 function attr_begin() {
1365     # attr_begin1("slealth", 40, "Stealth required")  Dont work!!!
1366     attr_id["dog"] = 1; attr_text["dog"] = "Dogs"
1367     attr_id["dogs"] = 1; attr_text["dogs"] = "Dogs"
1368     attr_id["fee"] = 2; attr_text["fee"] = "Access or parking fee"
1369     attr_id["rappelling"] = 3; attr_text["rappelling"] = "Climbing gear"
1370     attr_id["boat"] = 4; attr_text["boat"] = "Boat"
1371     attr_id["scuba"] = 5; attr_text["scuba"] = "Scuba gear"
1372     attr_id["kids"] = 6; attr_text["kids"] = "Recommended for kids"
1373     attr_id["onehour"] = 7; attr_text["onehour"] = "Takes less than an hour"
1374     attr_id["scenic"] = 8; attr_text["scenic"] = "Scenic view"
1375     attr_id["hiking"] = 9; attr_text["hiking"] = "Significant hike"
1376
1377     attr_id["climbing"] = 10; attr_text["climbing"] = "Difficult climbing"
1378     attr_id["wading"] = 11; attr_text["wading"] = "May require wading"
1379     attr_id["swimming"] = 12; attr_text["swimming"] = "May require swimming"
1380     attr_id["available"] = 13; attr_text["available"] = "Available at all times"
1381     attr_id["night"] = 14; attr_text["night"] = "Recommended at night"
1382     attr_id["winter"] = 15; attr_text["winter"] = "Available during winter"
1383     attr_id["poisonoak"] = 16; attr_text["poisonoak"] = "Poison plants"
1384     attr_id["dangerousanimals"] = 17; attr_text["dangerousanimals"] = "Dangerous Animals"
1385     attr_id["ticks"] = 18; attr_text["ticks"] = "Ticks"
1386
1387     attr_id["mines"] = 19; attr_text["mines"] = "Abandoned mines"
1388     attr_id["cliff"] = 20; attr_text["cliff"] = "Cliff / falling rocks"
1389     attr_id["hunting"] = 21; attr_text["hunting"] = "Hunting"
1390     attr_id["danger"] = 22; attr_text["danger"] = "Dangerous area"
1391     attr_id["wheelchair"] = 23; attr_text["wheelchair"] ="Wheelchair accessible"
1392     attr_id["parking"] = 24; attr_text["parking"] = "Parking available"
1393     attr_id["public"] = 25; attr_text["public"] = "Public transportation"
1394     attr_id["water"] = 26; attr_text["water"] = "Drinking water nearby"
1395     attr_id["restrooms"] = 27; attr_text["restrooms"] ="Public restrooms nearby"
1396     attr_id["phone"] = 28; attr_text["phone"] = "Telephone nearby"
1397
1398     attr_id["picnic"] = 29; attr_text["picnic"] = "Picnic tables nearby"
1399     attr_id["camping"] = 30; attr_text["camping"] = "Camping available"
1400     attr_id["bicycles"] = 31; attr_text["bicycles"] = "Bicycles"
1401     attr_id["motorcycles"] = 32; attr_text["motorcycles"] = "Motorcycles"
1402     attr_id["quads"] = 33; attr_text["quads"] = "Quads"
1403     attr_id["jeeps"] = 34; attr_text["jeeps"] = "Off-road vehicles"
1404     attr_id["snowmobiles"] = 35; attr_text["snowmobiles"] = "Snowmobiles"
1405     attr_id["horses"] = 36; attr_text["horses"] = "Horses"
1406     attr_id["campfires"] = 37; attr_text["campfires"] = "Campfires"
1407     attr_id["thorns"] = 38; attr_text["thorns"] = "Thorns"
1408
1409     attr_id["stealth"] = 39; attr_text["stealth"] = "Stealth required"
1410     attr_id["stroller"] = 40; attr_text["stroller"] = "Stroller accessible"
1411     attr_id["firstaid"] = 41; attr_text["firstaid"] = "Needs maintenance"
1412     attr_id["cow"] = 42; attr_text["cow"] = "Watch for livestock"
1413     attr_id["flashlight"] = 43; attr_text["flashlight"] = "Flashlight required"
1414     attr_id["landf"] = 44; attr_text["landf"] = "Lost And Found Tour"
1415     attr_id["rv"] = 45; attr_text["rv"] = "Recreational Vehicle"
1416     attr_id["field"] = 46; attr_text["field"] = "Field Puzzle"
1417     attr_id["UV"] = 47; attr_text["UV"] = "UV Light Required"
1418     attr_id["snowshoes"] = 48; attr_text["snowshoes"] = "Snowshoes"
1419
1420     attr_id["skiis"] = 49; attr_text["skiis"] = "Cross Country Skis"
1421     attr_id["s-tool"] = 50; attr_text["s-tool"] = "Special Tool Required"
1422     attr_id["nightcache"] = 51; attr_text["nightcache"] = "Night Cache"
1423     attr_id["parkngrab"] = 52; attr_text["parkngrab"] = "Park and Grab"
1424     attr_id["AbandonedBuilding"] = 53; attr_text["AbandonedBuilding"] = "Abandoned Structure"
1425     attr_id["hike_short"] = 54; attr_text["hike_short"] = "Short hike (less than 1km)"
1426     attr_id["hike_med"] = 55; attr_text["hike_med"] = "Medium hike (1km-10km)"
1427     attr_id["hike_long"] = 56; attr_text["hike_long"] = "Long hike (+10km)"
1428     attr_id["fuel"] = 57; attr_text["fuel"] = "Fuel Nearby"
1429     attr_id["food"] = 58; attr_text["food"] = "Food Nearby"
1430
1431     attr_id["wirelessbeacon"] = 59; attr_text["wirelessbeacon"] = "Wireless Beacon"
1432     attr_id["partnership"] = 60; attr_text["partnership"] = "Partnership"
1433     attr_id["seasonal"] = 61; attr_text["seasonal"] = "Seasonal Access"
1434     attr_id["tourist"] = 62; attr_text["tourist"] = "Tourist Friendly"
1435     attr_id["treeclimbing"] = 63; attr_text["treeclimbing"] = "Tree Climbing"
1436     attr_id["frontyard"] = 64; attr_text["frontyard"] = "Front Yard (Private Residence)"
1437     attr_id["teamwork"] = 65; attr_text["teamwork"] = "Teamwork Required"
1438 }
1439
1440 function tagattr(lvl, kind, yesno) {
1441     kind = kind ""
1442     #debug(1, "kind: \"" kind "\"")
1443     if (attr_id[kind] == 0)
1444         return
1445     printf "%*s", lvl*2, ""
1446     printf "<groundspeak:attribute id=\"%d\" inc=\"%d\">", attr_id[kind], yesno
1447     printf "%s", attr_text[kind]
1448     printf "</groundspeak:attribute>\n"
1449 }
1450
1451 /cache_types.aspx/ {    # gc 02/01/11
1452     gs_type = $0
1453     sub(/.* alt=./, "", gs_type)
1454     sub(/. width=.*/, "", gs_type)
1455     sub(/. title=.*/, "", gs_type)
1456     debug(1, "gs_type: " gs_type)
1457 }
1458 /<span id="ctl00_ContentBody_CacheName">/ {
1459     if (gs_type)
1460     {
1461         gs_name = remspan($0, "ctl00_ContentBody_CacheName")
1462         next
1463     }
1464     gs_type = $0
1465     sub(/.* alt=./, "", gs_type)
1466     sub(/. width=.*/, "", gs_type)
1467     debug(1, "type: " gs_type)
1468 }
1469 /<span id="CacheName">/         { gs_name = remspan($0, "CacheName") }
1470 /<span id="ctl00_ContentBody_CacheName">/ {
1471     gs_name = remspan($0, "ctl00_ContentBody_CacheName")
1472 }
1473 /<span id=".*WaypointName".*>/  { gcid = remspan($0) }
1474 /;wp=GC.*" /    {
1475     # new way, yech!
1476     gcid = $0; sub(/.*wp=/, "", gcid); sub(/".*/, "", gcid)
1477 }
1478 /<span id=".*ShortDescription">/        {
1479     gs_short_description = remspan($0)
1480 }
1481 /<span id="LongDescription">/   {
1482     gs_long_description = remspanlong($0, "LongDescription")
1483     waypoints = ""
1484 }
1485 /<span id="ctl00_ContentBody_LongDescription">/ {
1486     gs_long_description = remspanlong($0, "ctl00_ContentBody_LongDescription")
1487     waypoints = ""
1488 }
1489 /<div id="div_hint"/            {
1490     hints = remdiv($0)
1491     gsub("\n", " ", hints)
1492     gsub("^ *", "", hints)
1493     gsub("<br>", "\n", hints)
1494     if (DECODE)
1495         hints=rot13(hints)
1496 }
1497 /<span id="Hints"/              {
1498     hints = remspan($0)
1499     hints = htmlclean(hints)
1500     if (DECODE)
1501         hints=rot13(hints)
1502     gsub("\n", " ", hints)
1503 }
1504 /<span id="ctl00_ContentBody_Hints"/            {
1505     hints = $0
1506     sub(".*displayMe.>", "", hints)
1507     sub("</span>.*", "", hints)
1508     gsub("<br>", "\n", hints)
1509     # debug(1, "Hints: " hints)
1510     if (DECODE)
1511         hints=rot13(hints)
1512 }
1513 /<b>Additional Waypoints/       {
1514     waypoints = remwaypoints()
1515     wplist = splitwaypoints(waypoints)
1516 }
1517 /<strong>Additional Waypoints/  {
1518     waypoints = remwaypoints()
1519     wplist = splitwaypoints(waypoints)
1520 }
1521 # 03/01/2011
1522 /ContentBody_WaypointsInfo/     {
1523     waypoints = remwaypoints()
1524     wplist = splitwaypoints(waypoints)
1525 }
1526 /class="LogsTable Table"/ {     # old
1527     logs_section = 1
1528 }
1529 /class="LogsTable"/ {           # new 06/28/11
1530     logs_section = 1
1531 }
1532 (logs_section > 0) {
1533     logs = logs $0
1534 }
1535 (logs_section > 0) && /<table/ {
1536     logs_section += 1
1537 }
1538 (logs_section > 0) && /<\/table>/ {
1539     logs_section -= 1
1540 }
1541
1542 /<span id="CacheLogs">/         {
1543     logs = remspanlong($0, "CacheLogs")
1544     # remove header which does not exist >2010-01-12
1545     sub(".*td class=.containerHeader.>Cache Logs</td></tr>", "", logs)
1546 }
1547 /<span id="ctl00_ContentBody_CacheLogs">/               {
1548     logs = remspanlong($0, "ctl00_ContentBody_CacheLogs")
1549 }
1550 /<span id=".*CacheStats">/      { stats = remspan($0) }
1551 /<span id=".*NumVisits">/       {
1552     numvisits = remspan($0)
1553     debug(1, numvisits)
1554 }
1555
1556 /lnkPrintFriendly/ {
1557     gid = $0
1558     if (gid ~ /ID=/)
1559     {
1560         # Printable page has ID number
1561         sub(/^.*ID=/, "", gid)
1562         sub(/&.*/, "", gid)
1563     }
1564     else
1565     {
1566         # Non-printable page has guid number
1567         sub(/^.*guid=/, "", gid)
1568         sub(/&.*/, "", gid)
1569     }
1570 }
1571 # Add optional "A cache ". 08/21/2012
1572 /^ *(A cache )*by <a href/ {
1573     gs_owner = $0
1574     sub(/.*ds=2.>/, "", gs_owner)
1575     sub(/<.*/, "", gs_owner)
1576     debug(1, "owner: " gs_owner)
1577     gs_guid = $0
1578     sub(/.*guid=/, "", gs_guid)
1579     sub(/&.*/, "", gs_guid)
1580 }
1581 # Fake gs_guid is user 03/01/2011
1582 /userInfo = {ID:/ {
1583     gs_guid = $0
1584     sub(/.*: /, "", gs_guid)
1585     sub(/}.*/, "", gs_guid)
1586     debug(1, "guid " gs_guid)
1587 }
1588 /.* alt=.Size/ {
1589     gs_size = $0
1590     sub(/.*Size: /, "", gs_size); sub(". />.*", "", gs_size)
1591 }
1592 /<span id="CacheOwner"/ {
1593     text = remspan($0)
1594     debug(1, "Owner text " text)
1595     gs_type = text; sub(/<.*/, "", gs_type)
1596     gs_owner = text
1597         debug(1, gs_owner)
1598         sub(/.*<br>by /, "", gs_owner); sub(/ [[].*/, "", gs_owner)
1599         debug(1, gs_owner)
1600         sub(/<a[^>]*>/, "", gs_owner)
1601         sub(/<.a[^>]*>/, "", gs_owner)
1602         sub(/.*<br .>/, "", gs_owner)
1603         sub(/^by /, "", gs_owner)
1604         debug(1, "owner " gs_owner)
1605     gs_size = text; sub(/.*Size: /, "", gs_size); sub(/<.*/, "", gs_size)
1606     gs_guid = text; sub(/.*guid=/, "", gs_guid)
1607     sub(/&.*/, "", gs_guid)
1608     debug(1, "guid " gs_guid)
1609 }
1610 /<span id="ctl00_ContentBody_CacheOwner"/ {
1611     text = $0
1612     debug(2, "Owner text: " text)
1613     gs_type = text
1614     sub(/<br .*/, "", gs_type)
1615     sub(/.*>/, "", gs_type)
1616     debug(1, "gs_type: " gs_type)
1617
1618     gs_owner = text
1619     sub(/.*ds=2.>/, "", gs_owner); sub(/<.*/, "", gs_owner)
1620     debug(1, "gs_owner: " gs_owner)
1621
1622     gs_size = text; sub(/.*Size: /, "", gs_size); sub(/<.*/, "", gs_size)
1623     gs_guid = text; sub(/.*guid=/, "", gs_guid)
1624     sub(/&.*/, "", gs_guid)
1625     sub(/. title=.*/, "", gs_guid)
1626     debug(1, "guid: " gs_guid)
1627 }
1628 /<span id="ErrorText"/ {
1629     if ($0 ~ "unavailable")
1630         available = "False"
1631     if ($0 ~ "been archived")
1632         archived = "True"
1633 }
1634 /<span id="ctl00_ContentBody_ErrorText"/ {
1635     errortext = remspan($0, "ctl00_ContentBody_ErrorText")
1636     if (errortext ~ "unavailable")
1637         available = "False"
1638     if (errortext ~ "been archived")
1639         archived = "True"
1640     debug(1, "available: " available "; archived: " archived)
1641 }
1642 /<span id="LargeMapPrint"/ {
1643     text = remspan($0)
1644     lat = text; sub(/.*latitude=/, "", lat); sub(/&.*/, "", lat)
1645     lon = text; sub(/.*longitude=/, "", lon); sub(/\".*/, "", lon)
1646     sub(/&.*/, "", lon)
1647 }
1648 /var lat=[-0-9]/ {
1649     if (lat == "")
1650     {
1651         lat = $0; sub(/.*lat=/, "", lat); sub(/;.*/, "", lat)
1652         lon = $0; sub(/.*lng=/, "", lon); sub(/;.*/, "", lon)
1653     }
1654 }
1655 /<span id=".*Location"/ {
1656     text = remspan($0)
1657     gs_state = text
1658     sub(/In */, "", gs_state)
1659     sub(/,.*/, "", gs_state)
1660
1661     gs_country = text;
1662     sub(/.*, /, "", gs_country)
1663     sub(/ <.*/, "", gs_country)
1664     sub(/^In /, "", gs_country)
1665 }
1666 /lat=.*; lng=.*; guid=/ {
1667     if (lat == "")
1668     {
1669         lat = $0; sub(/.*lat=/, "", lat); sub(/;.*/, "", lat)
1670         lon = $0; sub(/.*lng=/, "", lon); sub(/;.*/, "", lon)
1671     }
1672 }
1673 /<span class="minorCacheDetails">Hidden/ {      # gc 2/1/11
1674     getline time
1675     getline time
1676     sub(/^ */, "", time)
1677     sub(/<.*/, "", time)
1678     split(time, fld, "/")
1679     time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2])
1680     debug(1, "time: " time)
1681 }
1682 /> <span class="minorCacheDetails">/ {  # gc 6/28/11
1683     getline time
1684     getline time
1685     getline time
1686     sub(/^ */, "", time)
1687     sub(/<.*/, "", time)
1688     gsub(/-/, "/", time)
1689     rc = split(time, fld, "/")
1690     if (rc == 1)
1691         rc = split(time, fld, "-")
1692     debug(1, "timerc: " rc)
1693     if (DATEFMT == 1)
1694         time = sprintf("%d-%02d-%02d", fld[3], fld[2], fld[1])
1695     else if (fld[1] >= 1000)
1696         time = sprintf("%d-%02d-%02d", fld[1], fld[2], fld[3])
1697     else
1698         time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2])
1699     debug(1, "time: " time)
1700 }
1701 /<span id="DateHidden">/ {
1702     getline text
1703     time = remspan($0)
1704     split(time, fld, "/")
1705     time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2])
1706 }
1707 /<span id="ctl00_ContentBody_DateHidden">/ {
1708     time = remspan($0, "ctl00_ContentBody_DateHidden")
1709     rc = split(time, fld, "/")
1710     if (rc == 3)
1711     {
1712         time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2])
1713         debug(1, "time: " time)
1714         next
1715     }
1716     rc = split(time, fld, ",")
1717     if (rc == 3)
1718     {
1719         yyyy = fld[3];
1720         split(fld[2], fld, " ")
1721         mm = Month[ fld[1] ]
1722         dd = fld[2]
1723         time = sprintf("%d-%02d-%02d", yyyy, mm, dd)
1724         debug(1, "time: " time)
1725         next
1726     }
1727     time = ""
1728 }
1729 /ctl00_ContentBody_uxLegendScale/ {
1730     text = $0
1731     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1732     gs_diff = text
1733     debug(1 , "gs_diff: " gs_diff)
1734 }
1735 /ctl00_ContentBody_Localize/ {
1736     text = $0
1737     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1738     gs_terr = text
1739     debug(1 , "gs_terr: " gs_terr)
1740 }
1741 /^ *Difficulty:<.strong>/ {
1742     getline text
1743     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1744     gs_diff = text
1745     debug(1 , "gs_diff: " gs_diff)
1746 }
1747 /^ *Difficulty:/ {      # gc 2/1/11
1748     getline text
1749     getline text
1750     getline text
1751     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1752     gs_diff = text
1753     debug(1 , "gs_diff: " gs_diff)
1754 }
1755 /<span id="Difficulty">/ {
1756     text = remspan($0)
1757     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1758     gs_diff = text
1759 }
1760 /<span id="ctl00_ContentBody_Difficulty">/ {
1761     text = remspan($0, "ctl00_ContentBody_Difficulty")
1762     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1763     debug(1, "difficulty " text)
1764     gs_diff = text
1765 }
1766 /^ *Terrain:<.strong>/ {
1767     getline text
1768     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1769     gs_terr = text
1770     debug(1 , "gs_terr: " gs_terr)
1771 }
1772 /^ *Terrain:/ {         # gc 2/1/11
1773     getline text
1774     getline text
1775     getline text
1776     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1777     gs_terr = text
1778     debug(1 , "gs_terr: " gs_terr)
1779 }
1780 /<span id="Terrain">/ {
1781     text = remspan($0)
1782     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1783     gs_terr = text
1784 }
1785 /<span id="ctl00_ContentBody_Terrain">/ {
1786     text = remspan($0, "ctl00_ContentBody_Terrain")
1787     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1788     debug(1, "terrain " text)
1789     gs_terr = text
1790 }
1791 /title=.What are Attributes?/ {
1792     text = $0
1793     debug(5, "Attr " text)
1794     gsub("<img src=./images/attributes/", "", text)
1795     # before 06/03/10
1796         gsub(/alt="[^"]*" width="30" height="30" .>/, "", text)
1797     # after 06/03/10
1798         gsub(/alt="[^"]*" title="[^"]*" width="30" height="30" .>/, "", text)
1799     gsub("<p class=.NoSpacing.*", "", text)
1800     gsub(/^ */, "", text)
1801     gsub(/\.gif../, "", text)
1802     gsub(/attribute-blank/, "", text)
1803
1804     attrs_yes = text
1805     gsub(/[a-z0-9A-Z]*-no/, "", attrs_yes)
1806     gsub(/-yes/, "", attrs_yes)
1807
1808     attrs_no = text
1809     gsub(/[a-z0-9A-Z]*-yes/, "", attrs_no)
1810     gsub(/-no/, "", attrs_no)
1811
1812     debug(1, "attrs_yes: " attrs_yes)
1813     debug(1, "attrs_no: " attrs_no)
1814     nattr_yes = split(attrs_yes, attr_yes, " ")
1815     nattr_no = split(attrs_no, attr_no, " ")
1816     debug(1, "nattr_yes: " nattr_yes)
1817     debug(1, "nattr_no: " nattr_no)
1818 }
1819 /^{.status.:.success/ {
1820     ParseJSON($0, json_logs)
1821     json_log_bool = 1
1822 }
1823
1824 BEGIN {
1825     Month["January"] = 1
1826     Month["February"] = 2
1827     Month["March"] = 3
1828     Month["April"] = 4
1829     Month["May"] = 5
1830     Month["June"] = 6
1831     Month["July"] = 7
1832     Month["August"] = 8
1833     Month["September"] = 9
1834     Month["October"] = 10
1835     Month["November"] = 11
1836     Month["December"] = 12
1837     BaseURL = "http://www.geocaching.com/seek/cache_details.aspx"
1838     attr_begin()
1839
1840     first = 1
1841
1842     wpt_init()
1843 }
1844 /<\/html>/ {
1845     if ((lat == "") || (lon == ""))
1846     {
1847         debug(0, "Waypoint coordinates not found for " gcid ", no output!")
1848         #next
1849     }
1850
1851     # too long a block to be indented
1852     if (!INCR && first)
1853     {
1854         print "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
1855         tagstart(0, "gpx")
1856         tagtext(1, "desc", "Geocache file generated by geo-html2gpx")
1857         tagtext(1, "author", "geo-html2gpx")
1858         "date +%Y-%m-%dT%H:%M:%S" | getline date
1859         tagtext(1, "time", date)
1860         first = 0
1861     }
1862
1863     gs_name = umlauts(gs_name)
1864     gs_owner = umlauts(gs_owner)
1865
1866     tagstart(1, "wpt", "lat=\"" lat "\" lon=\"" lon "\"")
1867     if (time != "")
1868         tagtext(2, "time", time "T07:00:00Z")
1869     tagtext(2, "name", gcid)
1870     tagtext(2, "desc", gs_name " by " gs_owner ", " \
1871                     gs_type " (" gs_diff "/" gs_terr ")")
1872
1873     # alternate URL... tagtext(2, "url", BaseURL "?wp=" gcid)
1874     # alternate URL... tagtext(2, "url", BaseURL "?id=" gid)
1875     tagtext(2, "url", BaseURL "?wp=" gcid)
1876     tagtext(2, "urlname", gs_name)
1877
1878     # we do this last... tagtext(2, "sym", sym)
1879
1880     tagtext(2, "type", "Geocache|" gs_type)
1881
1882     # FIXME? GC-written GPX files contain numeric, non-UUID,
1883     # cache/owner/finder ids
1884     # Oregon needs numeric cache id, or behaves erratically!
1885     gid = wp2id(gcid)
1886     tagstart(2, "groundspeak:cache",
1887             "id=\"" gid "\" available=\"" available \
1888             "\" archived=\"" archived "\"" \
1889             " xmlns:groundspeak=\"http://www.groundspeak.com/cache/1/0/1\"")
1890     tagtext(3, "groundspeak:name", gs_name)
1891     tagtext(3, "groundspeak:placed_by", gs_owner)
1892     tagptext(3,"groundspeak:owner", "id=\"" gs_guid "\"", gs_owner)
1893     tagtext(3, "groundspeak:type", gs_type)
1894     tagtext(3, "groundspeak:container", gs_size)
1895
1896     if (nattr_yes != 0 || nattr_no != 0)
1897     {
1898         tagstart(3, "groundspeak:attributes")
1899         for (i = 1; i <= nattr_yes; ++i)
1900             tagattr(4, attr_yes[i], 1)
1901         for (i = 1; i <= nattr_no; ++i)
1902             tagattr(4, attr_no[i], 0)
1903         tagend(3, "groundspeak:attributes")
1904     }
1905
1906     tagtext(3, "groundspeak:difficulty", gs_diff)
1907     tagtext(3, "groundspeak:terrain", gs_terr)
1908     tagtext(3, "groundspeak:country", gs_country)
1909     tagtext(3, "groundspeak:state", gs_state)
1910     if (!NOHTML)
1911     {
1912         tagptext(3, "groundspeak:short_description", "html=\"True\"",
1913                                                         gs_short_description)
1914         if (!NOWPTS && waypoints)
1915         {
1916             # reproduce "simplified table" by GC PQ
1917             # prefixed_gcid - wpname<br />original_style_coord<br />note<br />
1918             waypoints = wpclean(waypoints)
1919             # include "zero" waypoints here!
1920             gs_long_description = gs_long_description \
1921                                 "<p>Additional Waypoints</p>" waypoints
1922         }
1923         tagptext(3, "groundspeak:long_description", "html=\"True\"",
1924                                                         gs_long_description)
1925     }
1926     else
1927     {
1928         gs_short_description = htmlclean(gs_short_description)
1929         tagptext(3, "groundspeak:short_description", "html=\"False\"",
1930                                                         gs_short_description)
1931         gs_long_description = htmlclean(gs_long_description)
1932         if (waypoints)
1933             gs_long_description = gs_long_description \
1934                     "\n\nAdditional Waypoints\n" tableclean(waypoints)
1935         tagptext(3, "groundspeak:long_description", "html=\"False\"",
1936                                                         gs_long_description)
1937     }
1938     tagtext(3, "groundspeak:encoded_hints", hints)
1939
1940     if (json_log_bool)
1941     {
1942         nlogs = JSONArrayLength(json_logs, "data")
1943         if (nlogs > NUMLOGS+1)
1944             nlogs = NUMLOGS+1
1945         debug(1, "New Logs: " nlogs)
1946         if (nlogs > 1)
1947             tagstart(3, "groundspeak:logs")
1948         else
1949             tagstart(3, "groundspeak:logs", "/")
1950
1951         for (i = 1; i < nlogs; ++i)
1952         {
1953             ltype = json_logs["data" SUBSEP i SUBSEP "LogTypeImage"]
1954             if (ltype ~ /smile/) ltype = "Found it"
1955             else if (ltype ~ /happy/) ltype = "Found it"
1956             else if (ltype ~ /note/) ltype = "Write note"
1957             else if (ltype ~ /sad/) ltype = "Didn'"'"'t Find it"
1958             else if (ltype ~ /attended/) ltype = "Attended"
1959             else if (ltype ~ /rsvp/) ltype = "Will Attend"
1960             else if (ltype ~ /greenlight/) ltype = "Green"
1961             else if (ltype ~ /traffic_cone/) ltype = "Archive"
1962             else if (ltype ~ /disabled/) ltype = "Temporarily Disable Listing"
1963             else if (ltype ~ /coord_update/) ltype = "Update Coordinates"
1964             else ltype = "Unknown"
1965
1966             ldate = json_logs["data" SUBSEP i SUBSEP "Visited"]
1967             n = split(ldate, fld, "/")
1968             if (n == 3)
1969             {
1970                 #new format: 08/18/2011
1971                 if (DATEFMT == 1)
1972                     ldate = sprintf("%d-%02d-%02dT20:00:00Z",
1973                         fld[3], fld[2], fld[1])
1974                 else
1975                     ldate = sprintf("%d-%02d-%02dT20:00:00Z",
1976                         fld[3], fld[1], fld[2])
1977                 debug(2, "logdate: " ldate)
1978             }
1979             lfinder = json_logs["data" SUBSEP i SUBSEP "UserName"]
1980             lfinder = umlauts(lfinder)
1981             logid = json_logs["data" SUBSEP i SUBSEP "LogID"]
1982             guid = json_logs["data" SUBSEP i SUBSEP "AccountID"]
1983             ltext = json_logs["data" SUBSEP i SUBSEP "LogText"]
1984             ltext = htmlclean(ltext)
1985             ltext = umlauts(ltext)
1986
1987             if (lfinder == USERNAME && ltype == "Found it")
1988                 sym = "Geocache Found"
1989             if (lfinder == USERNAME && ltype == "Attended")
1990                 sym = "Geocache Found"
1991             tagstart(4, "groundspeak:log", "id=\"" logid "\"")
1992             tagtext(5, "groundspeak:date", ldate)
1993             tagtext(5, "groundspeak:type", ltype)
1994             tagptext(5, "groundspeak:finder", "id=\"" guid "\"", lfinder)
1995             tagptext(5, "groundspeak:text", "encoded=\"" "False" "\"", ltext)
1996             tagend(4, "groundspeak:log")
1997         }
1998
1999         if (nlogs > 1)
2000             tagend(3, "groundspeak:logs")
2001     }
2002     else
2003     {
2004         # nlogs = split(logs, entry, "</tr>")
2005         nlogs = split(logs, entry, "</tr><tr>")
2006         if (nlogs > NUMLOGS+1)
2007             nlogs = NUMLOGS+1
2008
2009         if (nlogs > 1)
2010             tagstart(3, "groundspeak:logs")
2011         else
2012             tagstart(3, "groundspeak:logs", "/")
2013
2014         for (i = 1; i < nlogs; ++i)
2015         {
2016             sub("<tr><td[^>]*>", "", entry[i])
2017             sub("</td>", "", entry[i])
2018             if (!entry[i]) continue
2019             # old split location
2020             sub(/.*<[Ss][Tt][Rr][Oo][Nn][Gg]><img src=./, "", entry[i])
2021
2022             ltype = entry[i]
2023             #debug(1, "log: " ltype)
2024             sub(/>.*/, "", ltype) # leaves the URL of the smiley
2025             if (ltype ~ /smile/) ltype = "Found it"
2026             else if (ltype ~ /happy/) ltype = "Found it"
2027             else if (ltype ~ /note/) ltype = "Write note"
2028             else if (ltype ~ /sad/) ltype = "Didn'"'"'t Find it"
2029             else if (ltype ~ /attended/) ltype = "Attended"
2030             else if (ltype ~ /rsvp/) ltype = "Will Attend"
2031             else if (ltype ~ /greenlight/) ltype = "Green"
2032             else if (ltype ~ /traffic_cone/) ltype = "Archive"
2033             else if (ltype ~ /disabled/) ltype = "Temporarily Disable Listing"
2034             else if (ltype ~ /coord_update/) ltype = "Update Coordinates"
2035             else ltype = "Unknown"
2036
2037             ldate = entry[i]
2038             # split off &nbsp;/blank
2039             sub(/^[^>]*>[^ ;]*[ ;]/, "", ldate)
2040             sub(/ by <.*/, "", ldate)
2041             sub(/ by /, "", ldate)
2042             sub(/.*LogDate.>about /, "", ldate)
2043             sub(/.*LogDate.>/, "", ldate)
2044             sub(/<.*/, "", ldate)
2045             gsub(/-/, "/", ldate)
2046             debug(1, "logdate: " ldate)
2047             if (ldate ~ /ago/)
2048             {
2049                 cmd = sprintf("%s -d \"12am %s\" +%%Y-%%m-%%dT07:00:00Z",
2050                     DATE, ldate)
2051                 cmd | getline ldate; close(cmd)
2052             }
2053             else
2054             {
2055                 n = split(ldate, fld, " ")
2056                 if (n >= 2)
2057                 {
2058                     #old format: August 18
2059                     mm = Month[fld[1]]
2060                     dd = fld[2] + 0
2061                     if (n >= 3)
2062                         yy = fld[3]
2063                     if (yy+0 == 0)
2064                         yy = YR
2065                     ldate = sprintf("%d-%02d-%02dT07:00:00", yy, mm, dd)
2066                 }
2067                 n = split(ldate, fld, "/")
2068                 if (n == 3)
2069                 {
2070                     #new format: 08/18/2011
2071                     if (DATEFMT == 1)
2072                         ldate = sprintf("%d-%02d-%02dT07:00:00",
2073                             fld[3], fld[2], fld[1])
2074                     else
2075                         ldate = sprintf("%d-%02d-%02dT07:00:00",
2076                             fld[3], fld[1], fld[2])
2077                     debug(1, "logdate: " ldate)
2078                 }
2079             }
2080
2081             lfinder = entry[i]
2082             sub(/[^<]*</, "", lfinder)  # Delete all before <A NAME...
2083
2084             logid = lfinder
2085             sub(/[^"]*"/, "", logid)
2086             sub(/.* id="/, "", logid)
2087             sub(/.*LUID=/, "", logid)
2088             sub(/\".*/, "", logid)
2089             debug(1, "logid: " logid)
2090
2091             guid = lfinder
2092             debug(1, "guid: " guid)
2093             #sub(/[^>]*>/, "", guid)            # Delete all before <A HREF...
2094             #sub(/>.*/, "", guid)               # Delete all after <A HREF...
2095             sub(/.*guid=/, "", guid)
2096             sub(/\".*/, "", guid)
2097             sub(/\&.*/, "", guid)
2098             sub(/. id=.*/, "", guid)
2099             debug(1, "guid: " guid)
2100
2101             #debug(1, "lfinder: " lfinder)
2102             sub(/[^>]*>/, "", lfinder)          # Delete all before <A HREF...
2103             #debug(1, "lfinder: " lfinder)
2104             #sub(/[^>]*>/, "", lfinder) # Delete all before name
2105             sub(/<.*/, "", lfinder)             # Delete all after name
2106             lfinder = umlauts(lfinder)
2107             debug(1, "lfinder: " lfinder)
2108
2109             ltext = entry[i]
2110             sub(/.*found\)<br .>/, "", ltext)
2111             sub("</font>.*", "", ltext)
2112             sub("<a href=.log.aspx[^>]*>[^<]*</a>", "", ltext)
2113             sub("<a href=.upload.aspx[^>]*>[^<]*</a>", "", ltext)
2114             # remove remaining HTML tags from log text. Seems to be a good
2115             # idea in any case, independent of NOHTML setting!
2116             ltext = htmlclean(ltext)
2117             ltext = umlauts(ltext)
2118
2119             if (lfinder == USERNAME && ltype == "Found it")
2120                 sym = "Geocache Found"
2121             if (lfinder == USERNAME && ltype == "Attended")
2122                 sym = "Geocache Found"
2123             tagstart(4, "groundspeak:log", "id=\"" logid "\"")
2124             tagtext(5, "groundspeak:date", ldate)
2125             tagtext(5, "groundspeak:type", ltype)
2126             tagptext(5, "groundspeak:finder", "id=\"" guid "\"", lfinder)
2127             tagptext(5, "groundspeak:text", "encoded=\"" "False" "\"", ltext)
2128             tagend(4, "groundspeak:log")
2129         }
2130         if (nlogs > 1)
2131             tagend(3, "groundspeak:logs")
2132     }
2133
2134     tagstart(3, "groundspeak:travelbugs", "/")
2135
2136     tagend(2, "groundspeak:cache")
2137     tagtext(2, "sym", sym)
2138     tagend(1, "wpt")
2139
2140     # add Additional Waypoints in wpt form
2141     if (!NOWPTS && wplist)
2142     {
2143         split(wplist, wps, "\n")
2144         i = 0
2145         for (wp in wps)
2146             ++i
2147         wp = 0
2148         while (wp < i)
2149         {
2150             ++wp
2151             # lat lon|prefix|lookup|wpname|url|note
2152             # i.e.: lat="44.888267" lon="-93.159233"|PC|PARK|http://...
2153             #           |GCPMG6-Parking (Parking Area)|.31 miles from cache.
2154             debug(1, "wps: " wps[wp])
2155             split(wps[wp], line, "|")
2156             if (line[1] &&
2157                 (!NOZERO || (line[1] !~ "lat=\"0.000000\" lon=\"0.000000\"") ) )
2158             {
2159                 # line format: coords|prefix|lookup|wpname|note
2160                 tagstart(1, "wpt", line[1])
2161                 #tagtext(2, "time", "...")
2162                 tagtext(2, "name", line[2] substr(gcid,3))
2163                 tagtext(2, "cmt", line[6] ? line[6] : "")
2164                 statname = line[4]
2165                 gsub(" \\(.*\\).*", "", statname)
2166
2167                 desc = line[4]
2168                 sub(" \\(.*", "", desc)
2169                 tagtext(2, "desc", desc)
2170
2171                 tagtext(2, "url", line[5])
2172
2173                 urlname = desc
2174                 tagtext(2, "urlname", urlname)
2175
2176                 stattype = line[4]
2177                 gsub(".*\\(", "", stattype)
2178                 gsub("\\).*", "", stattype)
2179                 tagtext(2, "sym", stattype)
2180                 tagtext(2, "type", "Waypoint|" stattype)
2181                 tagend(1, "wpt")
2182             }
2183         }
2184     }
2185     wpt_init()
2186 }
2187 END {
2188     if (!INCR && !first)
2189         tagend(0, "gpx")
2190 }
2191 ' | $POSTPROC