tools/geo-html2gpx

   1 #!/bin/sh
   2
   3 PROGNAME="$0"
   4
   5 usage() {
   6         cat <<EOF
   7 NAME
   8         `basename $PROGNAME`- Convert gc.com *printable* web pages into GPX
   9
  10 SYNOPSIS
  11         `basename $PROGNAME` [options] [gc-com.html]...
  12
  13 DESCRIPTION
  14         Convert gc.com *printable* web pages into GPX, including
  15         cache description and all logs.
  16
  17         The *printable* web pages can be fetched using geo-nearest,
  18         geo-newest, geo-placed, geo-found, or geo-gid with the -H option.
  19
  20 OPTIONS
  21         -b              Normalize output by postprocessing with gpsbabel
  22         -e              Encode hints with rot13 (e.g. NORTH = ABEGU)
  23         -i              Incremental, no XML and GPX headers
  24         -l number       Maximum number of log entries to be exported [unlimited]
  25         -n              No HTML in descriptions (experimental)
  26         -o FMT          Output FMT instead of GPX by using gpsbabel
  27         -u username     Indicate found status for username [$USERNAME]
  28         -w              Do not add "Additional Waypoints" to the GPX output
  29         -z              Do not output waypoints with "zero" coordinates
  30         -E var=val      Set environment "var" to "val"
  31                         i.e. DATEFMT=0|1
  32         -D lvl          Debug level
  33
  34 DEFAULTS
  35         Defaults can also be set with variables in file \$HOME/.georc:
  36
  37             DATEFMT=[0|1];
  38
  39 DATE FORMATS
  40         Geocaching.com date formats that are compatible:
  41
  42             GC Format   Example     Compatible
  43             YYYY-MM-DD  2011-07-13  yes
  44             YYYY/MM/DD  2011/07/13  yes
  45             MM/DD/YYYY  07/13/2011  yes
  46             DD/MM/YYYY  13/07/2011  yes if DATEFMT=1 in \$HOME/.georc
  47             DD/Mmm/YYYY 13/Jul/2001 no
  48             Mmm/DD/YYYY Jul/13/2011 no
  49             DD Mmm YY   13 Jul 11   yes (english only)
  50
  51 EXAMPLES
  52         Convert into GPX:
  53
  54             geo-found -n9999 -H. > /dev/null
  55             geo-html2gpx *.html > found.gpx
  56 EOF
  57
  58         exit 1
  59 }
  60
  61 #
  62 #       Report an error and exit
  63 #
  64 error() {
  65         echo "`basename $PROGNAME`: $1" >&2
  66         exit 1
  67 }
  68
  69 debug() {
  70         if [ $DEBUG -ge $1 ]; then
  71             echo "`basename $PROGNAME`: $2" >&2
  72         fi
  73 }
  74
  75 if [ `uname` = 'Darwin' ]; then
  76     awk=gawk
  77     date=gdate
  78 else
  79     awk=awk
  80     date=date
  81 fi
  82
  83 #
  84 #       Read RC file, if there is one
  85 #
  86 USERNAME=
  87 if [ -f $HOME/.georc ]; then
  88         . $HOME/.georc
  89         # N.B. must switch to read_rc_file if LAT/LON is ever needed here
  90 fi
  91 #
  92
  93 #       Process the options
  94 #
  95 POSTPROC="cat"
  96 DEBUG=0
  97 INCR=0
  98 NOWPTS=0
  99 NOZERO=0
 100 NOHTML=0
 101 DECODE=1
 102 NUMLOGS=1000000
 103 while getopts "beE:iwzl:no:u:D:h?" opt
 104 do
 105         case $opt in
 106         b)      POSTPROC="gpsbabel -igpx -f- -ogpx -F-";;
 107         e)      DECODE=0;;
 108         E)      eval "$OPTARG";;
 109         i)      INCR=1;;
 110         l)      NUMLOGS="$OPTARG";;
 111         o)      POSTPROC="gpsbabel -igpx -f- -o$OPTARG -F-";;
 112         n)      NOHTML=1;;
 113         u)      USERNAME="$OPTARG";;
 114         w)      NOWPTS=1;;
 115         z)      NOZERO=1;;
 116         D)      DEBUG="$OPTARG";;
 117         h|\?)   usage;;
 118         esac
 119 done
 120 shift `expr $OPTIND - 1`
 121
 122 #
 123 #       Main Program
 124 #
 125 YR=`date +"%Y"`
 126
 127 cat "$@" | tr -d '\001\002\003\004\005\006\007\015\022\026\030' \
 128 | sed 's/<A /\
 129 <A /g' |
 130 $awk -vDEBUG=$DEBUG -vINCR=$INCR \
 131     -vNOWPTS=$NOWPTS -vNOZERO=$NOZERO \
 132     -vNOHTML=$NOHTML \
 133     -vDECODE=$DECODE \
 134     -vUSERNAME="$USERNAME" \
 135     -vDATE="$date" \
 136     -vDATEFMT="$DATEFMT" \
 137     -vYR="$YR" -vNUMLOGS=$NUMLOGS \
 138 '
 139 # Copyright (c) 2010 Dan Saar
 140 #
 141 # Permission is hereby granted, free of charge, to any person obtaining a copy
 142 # of this software and associated documentation files (the "Software"), to deal
 143 # in the Software without restriction, including without limitation the rights
 144 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 145 # copies of the Software, and to permit persons to whom the Software is
 146 # furnished to do so, subject to the following conditions:
 147 #
 148 # The above copyright notice and this permission notice shall be included in
 149 # all copies or substantial portions of the Software.
 150 #
 151 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 152 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 153 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 154 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 155 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 156 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 157 # THE SOFTWARE.
 158
 159 function prsJSON_hex2num(s,     rv, ii, len, k)
 160 {
 161    rv = 0
 162    s = tolower(s)
 163    len = length(s)
 164
 165    for (ii = 1; ii <= len; ii++)
 166    {
 167       k = index("0123456789abcdef", substr(s, ii, 1))
 168       if (k > 0)
 169          rv = rv * 16 + (k-1)
 170       else
 171          break;
 172    }
 173
 174    return rv
 175 }
 176
 177 function prsJSON_EncodeAsUTF8( v,      s, p1, p2, p3, p4, cs )
 178 {
 179    cs = "\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377"
 180
 181    if ( v < 128 )
 182       s = sprintf("%c", v )
 183
 184    else if ( v < 2048 ) # 110xxxxx 10xxxxxx
 185    {
 186       p1 = int(v/64) % 32
 187       p2 = v % 64
 188       s = substr(cs, 65+p1, 1) substr(cs, p2+1, 1)
 189    }
 190
 191    else if ( v < 65536 ) # 1110xxxx 10xxxxxx 10xxxxxx
 192    {
 193       p1 = int(v/4096) % 16
 194       p2 = int(v/64) % 64
 195       p3 = v % 64
 196       s = substr(cs, 97+p1, 1) substr(cs, p2+1, 1) substr(cs, p3+1, 1)
 197    }
 198
 199    else if ( v < 1114112 ) # 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 200    {
 201       p1 = int(v/262144) % 8
 202       p2 = int(v/4096) % 64
 203       p3 = int(v/64) % 64
 204       p4 = v % 64
 205       s = substr(cs, 113+p1, 1) substr(cs, p2+1, 1) substr(cs, p3+1, 1) substr(cs, p4+1, 1)
 206    }
 207
 208    else
 209       s = ""
 210
 211    return s;
 212 }
 213
 214 function prsJSON_UnescapeString(jsonString,     matchedString, matchedValue)
 215 {
 216    if (jsonString == "\"\"")
 217       return ""
 218
 219    if (jsonString ~ /^".+"$/)
 220       jsonString = substr(jsonString,2,length(jsonString)-2)
 221
 222    gsub(/\\\\/, "\\u005C", jsonString)
 223    gsub(/\\"/, "\"", jsonString)
 224    gsub(/\\\//, "/", jsonString)
 225    gsub(/\\b/, "\b", jsonString)
 226    gsub(/\\f/, "\f", jsonString)
 227    gsub(/\\n/, "\n", jsonString)
 228    gsub(/\\r/, "\r", jsonString)
 229    gsub(/\\t/, "\t", jsonString)
 230
 231    if (match(jsonString, /\\[^u]/))
 232       return "ParseJSON Error: Invalid String at " jsonString
 233
 234    # handle encoded UTF-16 surrogates
 235    while (match(jsonString, /\\uD[89AaBb][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]\\uD[CcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/))
 236    {
 237       matchedValue = (prsJSON_hex2num(substr(jsonString, RSTART+2, 4)) % 1024) * 1024 + prsJSON_hex2num(substr(jsonString, RSTART+8, 4)) % 1024 + 65536
 238       #print matchedValue, substr(jsonString, RSTART+2, 4), substr(jsonString, RSTART+8, 4)
 239       matchedString = prsJSON_EncodeAsUTF8( matchedValue )
 240       sub(/\\uD[89AaBb][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]\\uD[CcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/, matchedString, jsonString)
 241    }
 242
 243    while (match(jsonString, /\\u[0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/))
 244    {
 245       matchedValue = prsJSON_hex2num(substr(jsonString, RSTART+2, 4))
 246       matchedString = prsJSON_EncodeAsUTF8( matchedValue )
 247       sub(/\\u[0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf][0123456789AaBbCcDdEeFf]/, matchedString, jsonString)
 248    }
 249
 250    return jsonString;
 251 }
 252
 253 function prsJSON_ValidString(jsonString)
 254 {
 255    return jsonString !~ /^ParseJSON Error: Invalid String at /
 256 }
 257
 258 function prsJSON_SetDataValue(jsonData, prefix, value)
 259 {
 260    jsonData[prefix] = value
 261 }
 262
 263 function prsJSON_Error(jsonStringArr, cnt, idx, jsonData, message)
 264 {
 265    split("", jsonData)
 266    prsJSON_SetDataValue(jsonData, "1", sprintf("ParseJSON Error: %s at ", message) (idx <= cnt ? jsonStringArr[idx] : ""))
 267    split("", jsonStringArr)
 268    return cnt + 1
 269 }
 270
 271 function prsJSON_CopyError(jsonData, tv)
 272 {
 273    split("", jsonData)
 274    prsJSON_SetDataValue(jsonData, "1", tv[1])
 275 }
 276
 277 function prsJSON_ParseNumber(jsonStringArr, cnt, idx, jsonData, prefix)
 278 {
 279    if (idx <= cnt)
 280    {
 281       if (match(jsonStringArr[idx], /^(\-?)(0|[123456789][0123456789]*)(\.[0123456789]+)?([eE][+-]?[0123456789]+)?/))
 282       {
 283          prsJSON_SetDataValue(jsonData, prefix, substr(jsonStringArr[idx], 1, RLENGTH))
 284          jsonStringArr[idx] = length(jsonStringArr[idx]) >= RLENGTH+1 ? substr(jsonStringArr[idx], RLENGTH+1) : ""
 285       }
 286       else
 287          idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Number not found") # starts like a number, but doesnt match the REGEX
 288    }
 289
 290    return idx
 291 }
 292
 293 function prsJSON_ParseString(jsonStringArr, cnt, idx, jsonData, prefix,      jsonString, idxn, idxs, idxq, t)
 294 {
 295    if (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == "\"")
 296    {
 297       idxn = 2
 298       jsonString = jsonStringArr[idx]
 299
 300       do
 301       {
 302          t = length(jsonString) >= idxn ? substr(jsonString, idxn) : ""
 303          idxs = index(t, "\\")
 304          idxq = index(t, "\"")
 305
 306          # no valid close quote found
 307          if (idxq == 0)
 308          {
 309             if (idx == cnt)
 310                break;
 311
 312             idx++
 313             jsonString = jsonString "," jsonStringArr[idx]
 314          }
 315
 316          # a valid close quote was found - not before a slash
 317          if (idxq != 0 && (idxs == 0 || (idxs != 0 && idxq < idxs)))
 318             break;
 319
 320          if (idxs != 0 && idxq == idxs + 1) # slash quote
 321             idxn = idxn + idxq
 322
 323          else
 324             idxn = idxn + idxs + 1
 325
 326       } while (1)
 327
 328       if (idxq > 0)
 329       {
 330          t = substr(jsonString, 1, idxn+idxq-1)
 331          if (match(t, /[\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037]/) == 0)
 332          {
 333             t = prsJSON_UnescapeString(t)
 334             if ( prsJSON_ValidString(t) )
 335             {
 336                prsJSON_SetDataValue(jsonData, prefix, t)
 337                jsonStringArr[idx] = length(jsonString) >= idxn+idxq ? substr(jsonString,idxn+idxq) : ""
 338             }
 339             else
 340                idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Invalid string")
 341          }
 342          else
 343             idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Invalid character in string")
 344       }
 345       else
 346          idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Unterminated string")
 347    }
 348    else
 349       idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "String expected")
 350
 351    return idx
 352 }
 353
 354 function prsJSON_ParseObject(jsonStringArr, cnt, idx, jsonData, prefix,     tv )
 355 {
 356    if (idx <= cnt)
 357    {
 358       sub(/^\{[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip open { and skipwhite
 359
 360       while (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) != "}")
 361       {
 362          idx = prsJSON_ParseString(jsonStringArr, cnt, idx, tv, "1")
 363
 364          if (idx <= cnt && length(tv[1]) == 0)
 365              idx = prsJSON_Error(jsonStringArr, cnt, idx, tv, "Empty string used for property name")
 366
 367          if (idx <= cnt)
 368          {
 369             sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 370
 371             if ( length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == ":" )
 372             {
 373                sub(/^:[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip colon and skipwhite
 374
 375                if ( length(jsonStringArr[idx]) > 0 )
 376                {
 377                   idx = prsJSON_ParseJSONInt(jsonStringArr, cnt, idx, jsonData, prefix != "" ? prefix SUBSEP tv[1] : tv[1])
 378                   if (idx <= cnt)
 379                   {
 380                      sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 381
 382                      if (length(jsonStringArr[idx]) == 0 && idx < cnt)
 383                      {
 384                         idx++
 385                         sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 386                         if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) == "}")
 387                            idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected object property")
 388                      }
 389
 390                      else if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "}")
 391                         idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected object property or closing brace")
 392                   }
 393                }
 394                else
 395                   idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected JSON value (1)")
 396             }
 397             else
 398                idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected colon")
 399          }
 400          else
 401             prsJSON_CopyError(jsonData, tv)
 402       }
 403
 404       if (idx <= cnt && (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "}"))
 405          idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected closing brace")
 406
 407       if (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == "}")
 408          sub(/^\}[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip close } and skipwhite
 409    }
 410
 411    return idx
 412 }
 413
 414 function prsJSON_ParseArray(jsonStringArr, cnt, idx, jsonData, prefix,     ii)
 415 {
 416    if (idx <= cnt)
 417    {
 418       sub(/^\[[ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip open bracket and skipwhite
 419       ii = 1
 420
 421       while (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) != "]")
 422       {
 423          idx = prsJSON_ParseJSONInt(jsonStringArr, cnt, idx, jsonData, prefix != "" ? prefix SUBSEP ii : ii )
 424          ii++
 425
 426          if (idx <= cnt)
 427          {
 428             sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 429
 430             if (length(jsonStringArr[idx]) == 0 && idx < cnt)
 431             {
 432                idx++;
 433                sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 434                if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) == "]")
 435                   idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected array value")
 436             }
 437
 438             else if (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "]")
 439                idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected array value or closing bracket")
 440          }
 441       }
 442
 443       if (idx <= cnt && (length(jsonStringArr[idx]) == 0 || substr(jsonStringArr[idx], 1, 1) != "]"))
 444          idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected closing bracket")
 445
 446       if (idx <= cnt && length(jsonStringArr[idx]) > 0 && substr(jsonStringArr[idx], 1, 1) == "]")
 447          sub(/^\][ \t\r\n\f]*/, "", jsonStringArr[idx]) #skip close bracket and skipwhite
 448    }
 449
 450    return idx
 451 }
 452
 453 function prsJSON_ParseJSONInt(jsonStringArr, cnt, idx, jsonData, prefix,     tk)
 454 {
 455    if (idx <= cnt)
 456    {
 457       sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 458
 459       if (length(jsonStringArr[idx]) > 0)
 460       {
 461          tk = substr(jsonStringArr[idx], 1, 1)
 462          if (tk == "\"" && prefix != "")
 463             idx = prsJSON_ParseString(jsonStringArr, cnt, idx, jsonData, prefix)
 464          else if (tk ~ /^[0123456789-]/ && prefix != "")
 465             idx = prsJSON_ParseNumber(jsonStringArr, cnt, idx, jsonData, prefix)
 466          else if (jsonStringArr[idx] ~ /^true/ && prefix != "")
 467          {
 468             prsJSON_SetDataValue(jsonData, prefix, "<<true>>")
 469             jsonStringArr[idx] = length(jsonStringArr[idx]) <= 4 ? "" : substr(jsonStringArr[idx],5)
 470          }
 471          else if (jsonStringArr[idx] ~ /^false/ && prefix != "")
 472          {
 473             prsJSON_SetDataValue(jsonData, prefix, "<<false>>")
 474             jsonStringArr[idx] = length(jsonStringArr[idx]) <= 5 ? "" : substr(jsonStringArr[idx],6)
 475          }
 476          else if (jsonStringArr[idx] ~ /^null/ && prefix != "")
 477          {
 478             prsJSON_SetDataValue(jsonData, prefix, "<<null>>")
 479             jsonStringArr[idx] = length(jsonStringArr[idx]) <= 4 ? "" : substr(jsonStringArr[idx],5)
 480          }
 481          else if (tk == "{")
 482             idx = prsJSON_ParseObject(jsonStringArr, cnt, idx, jsonData, prefix)
 483          else if (tk == "[")
 484             idx = prsJSON_ParseArray(jsonStringArr, cnt, idx, jsonData, prefix)
 485          else
 486             idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected JSON value (2)")
 487
 488          if (idx <= cnt)
 489             sub(/^[ \t\r\n\f]+/, "", jsonStringArr[idx]) #skipwhite
 490       }
 491
 492       if (prefix == "" && idx <= cnt && length(jsonStringArr[idx]) != 0)
 493          idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected end of JSON text")
 494       else if (prefix == "" && idx+1 <= cnt)
 495       {
 496          idx++
 497          idx = prsJSON_Error(jsonStringArr, cnt, idx, jsonData, "Expected end of JSON text (2)")
 498       }
 499
 500    }
 501
 502    return idx
 503 }
 504
 505 #
 506 # JSON Formatting Routines
 507 #
 508
 509 function useJSON_ArrayCount( possibleArray,     a, min, max, cnt, rv)
 510 {
 511    cnt = 0
 512
 513    for ( a in possibleArray )
 514    {
 515       if (possibleArray[a] "" !~ /^[0123456789][0123456789]*$/)
 516          return -1
 517
 518       if ( cnt == 0 )
 519       {
 520          min = possibleArray[a]
 521          max = possibleArray[a]
 522       }
 523       else
 524       {
 525          if (min == possibleArray[a] || max == possibleArray[a])
 526             return -1
 527
 528          if (possibleArray[a] < min)
 529             min = possibleArray[a]
 530
 531          if (max < possibleArray[a])
 532             max = possibleArray[a]
 533       }
 534
 535       cnt++
 536    }
 537
 538    if (min == 1 && max == cnt)
 539       return cnt
 540
 541    return -1
 542 }
 543
 544 function useJSON_GetObjectMembers(jsonSchema, prefix)
 545 {
 546    if (prefix == "") prefix = "<<novalue>>"
 547    return prefix in jsonSchema ? jsonSchema[prefix] : ""
 548 }
 549
 550 # quick sort array arr
 551 function utlJSON_qsortArray(arr, left, right,   i, last, t)
 552 {
 553    if (left >= right)   # do nothing if array has less than 2 elements
 554       return
 555    i = left + int((right-left+1)*rand())
 556    t = arr[left];
 557    arr[left] = arr[i];
 558    arr[i] = t
 559    last = left  # arr[left] is now partition element
 560    for (i = left+1; i <= right; i++)
 561    {
 562       if (arr[i] < arr[left])
 563       {
 564          last++
 565          t = arr[last];
 566          arr[last] = arr[i];
 567          arr[i] = t
 568       }
 569    }
 570    t = arr[left];
 571    arr[left] = arr[last];
 572    arr[last] = t
 573    utlJSON_qsortArray(arr, left, last-1)
 574    utlJSON_qsortArray(arr, last+1, right)
 575 }
 576
 577 function useJSON_GetSchema(jsonData, jsonSchema,    a, tidx, tv, sv, idx)
 578 {
 579    split("", jsonSchema)
 580    for (a in jsonData)
 581    {
 582       while (match(a, SUBSEP "[^" SUBSEP "]+$"))
 583       {
 584          tidx = substr(a,1,RSTART-1)
 585          tv = substr(a,RSTART+1)
 586          sv = (tidx in jsonSchema) ? jsonSchema[tidx] : ""
 587          # if ( sv != tv && sv !~ "^" tv SUBSEP && sv !~ SUBSEP tv "$" && sv !~ SUBSEP tv SUBSEP )
 588          # Rephrase this using index so object member names with regex characters work
 589          if ( sv != tv && index(sv, tv SUBSEP) != 1 && (length(sv) <= length(tv)+1 || substr(sv, length(sv)-length(tv)) != SUBSEP tv) && index(sv, SUBSEP tv SUBSEP) == 0 )
 590             jsonSchema[tidx] = sv (sv == "" ? "" : SUBSEP)  tv
 591          a = tidx
 592       }
 593
 594       tidx = "<<novalue>>"
 595       tv = a
 596       sv = (tidx in jsonSchema) ? jsonSchema[tidx] : ""
 597       if ( sv != tv && sv !~ "^" tv SUBSEP && sv !~ SUBSEP tv "$" && sv !~ SUBSEP tv SUBSEP )
 598          jsonSchema[tidx] = sv (sv == "" ? "" : SUBSEP)  tv
 599    }
 600 }
 601
 602 function useJSON_EscapeString(s,     ii, c, t, t2, t3, t4, cs)
 603 {
 604    cs = "\200\201\202\203\204\205\206\207\210\211\212\213\214\215\216\217\220\221\222\223\224\225\226\227\230\231\232\233\234\235\236\237\240\241\242\243\244\245\246\247\250\251\252\253\254\255\256\257\260\261\262\263\264\265\266\267\270\271\272\273\274\275\276\277\300\301\302\303\304\305\306\307\310\311\312\313\314\315\316\317\320\321\322\323\324\325\326\327\330\331\332\333\334\335\336\337\340\341\342\343\344\345\346\347\350\351\352\353\354\355\356\357\360\361\362\363\364\365\366\367\370\371\372\373\374\375\376\377"
 605    gsub(/\\/, "\\u005C", s)
 606    gsub(/"/, "\\\"", s)
 607    #gsub(/\//, "\\/", s) # required to decode, but not to encode
 608    gsub(/\b/, "\\b", s)
 609    gsub(/\f/, "\\f", s)
 610    gsub(/\n/, "\\n", s)
 611    gsub(/\r/, "\\r", s)
 612    gsub(/\t/, "\\t", s)
 613
 614    for ( ii = 1 ; ii <= length(s) ; ii++ )
 615    {
 616       t = substr(s,ii,1)
 617
 618       if (t == "\000") # having \000 in list below doesnt work in all awks
 619       {
 620          c = 0
 621          s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii==length(s) ? "" : substr(s, ii+1))
 622          ii += 5
 623       }
 624       else
 625       {
 626          c = index("\001\002\003\004\005\006\007\010\011\012\013\014\015\016\017\020\021\022\023\024\025\026\027\030\031\032\033\034\035\036\037", t)
 627          c = c == 0 ? -1 : c
 628
 629          if ( c >= 0 )
 630          {
 631             s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii==length(s) ? "" : substr(s, ii+1))
 632             ii += 5
 633          }
 634       }
 635
 636       t = index(cs, t)
 637       t2 = ii+1 <= length(s) ? index(cs, substr(s,ii+1,1)) : 0
 638       t3 = ii+2 <= length(s) ? index(cs, substr(s,ii+2,1)) : 0
 639       t4 = ii+3 <= length(s) ? index(cs, substr(s,ii+3,1)) : 0
 640
 641       if ( c < 0 && t > 64 && t <= 96 && ii+1 <= length(s) && t2 > 0 && t2 <= 64) # two character UTF-8 sequence
 642       {
 643          c = (t - 65)*64 + (t2-1)
 644          s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii+1==length(s) ? "" : substr(s, ii+2))
 645          ii += 5
 646       }
 647
 648       else if ( c < 0 && t > 96 && t <= 112 && ii+2 <= length(s) && t2 > 0 && t2 <= 64 && t3 > 0 && t3 <= 64) # three character UTF-8 sequence
 649       {
 650          c = (t - 97)*4096 + (t2-1)*64 + (t3-1)
 651          if ( c < 65536 )
 652          {
 653             s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X", c) (ii+2==length(s) ? "" : substr(s, ii+3))
 654             ii += 5
 655          }
 656          else
 657          {
 658             # encode in JSON-style with two \u#### UTF-16 surrogates
 659             # printf("1: %08X\n", c)
 660             s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X\\u%04X", (c/1024)%1024 + 55296, c%1024 + 56320) (ii+3==length(s) ? "" : substr(s, ii+4))
 661             ii += 11
 662          }
 663       }
 664
 665       # four character UTF-8 sequence, encode in JSON-style with two \u#### UTF-16 surrogates
 666       else if ( c < 0 && t > 112 && t <= 120 && ii+3 <= length(s) && t2 > 0 && t2 <= 64 && t3 > 0 && t3 <= 64 && t4 > 0 && t4 <=  64)
 667       {
 668          c = (t - 113)*262144 + (t2-1)*4096 + (t3-1)*64 + (t4-1)
 669          # printf("2: %08X, %d, %d, %d, %d\n", c, t, t2, t3, t4)
 670          # printf("\\u%04X\\u%04X\n", (c/1024)%1024 + 55296, c%1024 + 56320)
 671          c -= 65536
 672          s = (ii > 1 ? substr(s, 1, ii-1) : "") sprintf("\\u%04X\\u%04X", (c/1024)%1024 + 55296, c%1024 + 56320) (ii+3==length(s) ? "" : substr(s, ii+4))
 673          ii += 11
 674       }
 675    }
 676
 677    return "\"" s "\""
 678 }
 679
 680 function useJSON_GetDataValue(jsonData, prefix)
 681 {
 682    return prefix in jsonData ? jsonData[prefix] : "<<novalue>>"
 683 }
 684
 685 function useJSON_PrettyFormat(s, pretty)
 686 {
 687    if (s == "" || pretty <= 0) return s
 688
 689    # dont sprintf the whole thing, some awks have short buffers for sprintf
 690    return sprintf("%*.*s", (pretty-1)*3, (pretty-1)*3, "") s (s == "}" || s == "]" ? "" : "\n")
 691 }
 692
 693 function useJSON_FormatInt(jsonData, jsonSchema, prefix, pretty,     allLines, member, memberArr, memberList, arrCount, a, ii)
 694 {
 695    memberList = useJSON_GetObjectMembers(jsonSchema, prefix)
 696
 697    if ( memberList == "" )
 698    {
 699       a = useJSON_GetDataValue(jsonData, prefix)
 700       if ( a == "<<true>>" ) return "true"
 701       if ( a == "<<false>>" ) return "false"
 702       if ( a == "<<null>>" ) return "null"
 703       if ( a == "<<novalue>>" ) return "" # <<novalue>> is a help for dealing with empty arrays and objects
 704
 705       # if it looks like a number, encode it as such.  Cant tell a string from a number.
 706       if (a "" ~ /^(\-?)(0|[123456789][0123456789]*)(\.[0123456789]+)?([eE][+-]?[0123456789]+)?$/)
 707          return a
 708
 709       return useJSON_EscapeString(a)
 710    }
 711
 712    split(memberList, memberArr, SUBSEP)
 713    arrCount = useJSON_ArrayCount( memberArr )
 714
 715    if ( arrCount >= 0 )
 716    {
 717       allLines = "[" (pretty == 0 ? "" : "\n")
 718
 719       for ( ii = 1 ; ii <= arrCount ; ii++ )
 720          allLines = allLines useJSON_PrettyFormat(useJSON_FormatInt( jsonData, jsonSchema, prefix (prefix == "" ? "" : SUBSEP) ii, (pretty != 0 ? pretty+1 : 0)) (ii < arrCount ? "," : ""), pretty != 0 ? pretty+1 : 0)
 721       allLines = allLines useJSON_PrettyFormat("]", pretty)
 722
 723       return allLines
 724    }
 725
 726    allLines = "{" (pretty == 0 ? "" : "\n")
 727
 728    ii = 0
 729
 730    arrCount = 0
 731    for (a in memberArr)
 732       arrCount++
 733
 734    utlJSON_qsortArray(memberArr, 1, arrCount)
 735
 736    for ( ii = 1 ; ii <= arrCount ; ii++ )
 737       allLines = allLines useJSON_PrettyFormat(useJSON_EscapeString(memberArr[ii]) (pretty == 0 ? ":" : " : ") useJSON_FormatInt(jsonData, jsonSchema, prefix (prefix == "" ? "" : SUBSEP) memberArr[ii], (pretty != 0 ? pretty+1 : 0)) (ii < arrCount ? "," : ""), pretty != 0 ? pretty+1 : 0)
 738
 739    allLines = allLines useJSON_PrettyFormat("}", pretty)
 740
 741    return allLines
 742 }
 743
 744 #
 745 # Entry Points
 746 #
 747
 748 #
 749 # ParseJSON : Parse JSON text into an awk array
 750 #
 751 #    jsonString : JSON text
 752 #    jsonData : array of parsed JSON data
 753 #
 754 #    returns : N/A
 755 #
 756 function ParseJSON(jsonString, jsonData,      jsonStringArr, cnt)
 757 {
 758    # newlines split differently in some awks, replace them with formfeeds (also white space)
 759    # if (split("1\n2\n3", jsonData, ",") == 3) # is this an awk that splits newlines differently?
 760    gsub(/\n/, "\f", jsonString) # always replace literal newlines - allows compatibility when testing
 761
 762    split("", jsonData) # clear the array jsonData
 763    cnt = split(jsonString, jsonStringArr, ",")
 764    prsJSON_ParseJSONInt(jsonStringArr, cnt, 1, jsonData, "")
 765 }
 766
 767 #
 768 # FormatJSON : Format parsed JSON data back into JSON text
 769 #
 770 #    jsonData : array of parsed JSON data
 771 #    pretty : 0 = compact format, non-zero = pretty format
 772 #
 773 #    returns : string with JSON text
 774 #
 775 function FormatJSON(jsonData, pretty,    jsonSchema)
 776 {
 777    useJSON_GetSchema(jsonData, jsonSchema)
 778    return useJSON_FormatInt(jsonData, jsonSchema, "", pretty ? 1 : 0)
 779 }
 780
 781 #
 782 # JSONArrayLength : Find number of members in a JSON array
 783 #
 784 #    jsonData : array of parsed JSON data
 785 #    prefix : array name
 786 #
 787 #    returns : number of entries in the array
 788 #
 789 function JSONArrayLength(jsonData, prefix,     a, cnt, tv)
 790 {
 791    cnt = -1
 792
 793    for (a in jsonData)
 794    {
 795       if (prefix == "" || index(a, prefix) == 1)
 796       {
 797          tv = substr(a, prefix == "" ? 1 : (1+length(prefix)+1))
 798          if ( index(tv, SUBSEP) )
 799             tv = substr(tv, 1, index(tv, SUBSEP)-1)
 800          tv = tv + 0
 801          if ( tv > cnt )
 802             cnt = tv
 803       }
 804    }
 805
 806    return cnt
 807 }
 808
 809 #
 810 # JSONUnescapeString : turn a JSON-escaped string into UTF-8
 811 #
 812 #    jsonString : the escaped JSON string to convert
 813 #
 814 #    returns : the string in UTF-8
 815 #
 816 function JSONUnescapeString(jsonString)
 817 {
 818    return prsJSON_UnescapeString(jsonString)
 819 }
 820
 821 #
 822 # JSONIsTrue : return non-zero if the value is the true value
 823 #
 824 #    jsonValue : the value to test
 825 #
 826 #    returns : true or false
 827 #
 828 function JSONIsTrue(jsonValue)
 829 {
 830    return jsonValue == "<<true>>";
 831 }
 832
 833 #
 834 # JSONIsFalse : return non-zero if the value is the false value
 835 #
 836 #    jsonValue : the value to test
 837 #
 838 #    returns : true or false
 839 #
 840 function JSONIsFalse(jsonValue)
 841 {
 842    return jsonValue == "<<false>>";
 843 }
 844
 845 #
 846 # JSONIsNull : return non-zero if the value is the null value
 847 #
 848 #    jsonValue : the value to test
 849 #
 850 #    returns : true or false
 851 #
 852 function JSONIsNull(jsonValue)
 853 {
 854    return jsonValue == "<<null>>";
 855 }
 856
 857 #
 858 # JSONObjectMembers : get the set of members of an object
 859 #
 860 #    jsonData : array of parsed JSON data
 861 #    prefix : object name
 862 #    memberArr : [out] an array of the names of the object members, if the target was an object or an array
 863 #
 864 #    returns : If the target was actually an array rather than an object, the number of elements in the array
 865 #              Else, zero if the target was an object or a value
 866 #
 867 function JSONObjectMembers(jsonData, prefix, memberArr,     jsonSchema, memberList, rv, a)
 868 {
 869    useJSON_GetSchema(jsonData, jsonSchema)
 870    memberList = useJSON_GetObjectMembers(jsonSchema, prefix)
 871
 872    if ( memberList == "" )
 873    {
 874       split("", memberArr)
 875       return 0
 876    }
 877
 878    split(memberList, memberArr, SUBSEP)
 879    rv = useJSON_ArrayCount( memberArr )
 880    if ( rv == -1 ) # not an array, sort the object member names
 881    {
 882       rv = 0
 883       for (a in memberArr)
 884          rv++
 885
 886       utlJSON_qsortArray(memberArr, 1, rv)
 887       rv = 0
 888    }
 889    return rv
 890 }
 891 # End of Copyright (c) 2010 Dan Saar
 892
 893 function debug(lvl, text) {
 894     if (lvl <= DEBUG)
 895         print text > "/dev/stderr"
 896 }
 897
 898 function wpt_init() {
 899     available = "True"
 900     archived = "False"
 901     sym = "Geocache"
 902     json_log_bool = 0
 903     logs = ""
 904     logs_section = 0
 905     hints = ""
 906     lat = ""
 907     yy = 0
 908     wplist = ""
 909     nattr_yes = 0
 910     nattr_no = 0
 911     gs_type = ""
 912 }
 913
 914 function umlauts(text) {
 915     # Somewhat minimal translation of HTML entities in titles
 916     gsub("&#228;", "\xc3\xa4", text)
 917     gsub("&#246;", "\xc3\xb6", text)
 918     gsub("&#252;", "\xc3\xbc", text)
 919     gsub("&#196;", "\xc3\x84", text)
 920     gsub("&#214;", "\xc3\x96", text)
 921     gsub("&#220;", "\xc3\x9c", text)
 922     gsub("&#223;", "\xc3\x9f", text)
 923     gsub("&#176;", "\xc2\xb0", text)
 924     gsub("&amp;", "\\&", text)
 925     return text
 926 }
 927
 928 function htmlclean(text) {
 929     gsub("&nbsp;", " ", text)
 930     gsub("</?[pP][^>]*>", "\n", text)
 931     gsub("<[bB][rR][^>]*>", "\n", text)
 932     gsub("<[^>]*>", "", text)
 933     # compress whitespace
 934     gsub("\n\n\n*", "\n\n", text)
 935     gsub("[ \t][ \t]*", " ", text)
 936     return text
 937 }
 938
 939 function tableclean(text) {
 940     gsub("\n", "", text)
 941     gsub("&nbsp;", " ", text)
 942     # translate/remove HTML tags
 943     gsub("</?[pP][^>]*>", "\n", text)
 944     gsub("</[bB][rR][^>]*>", "", text)
 945     gsub("</?font[^>]*>", "", text)
 946     gsub("</?table[^>]*>", "", text)
 947     gsub("<t[rdh]>", "", text)
 948     gsub("</tr>", "\n", text)
 949     gsub("</t[dh][^>]*>", " | ", text)
 950     gsub("<[^>]*>", "", text)
 951     # compress whitespace
 952     gsub("[ \t][ \t]*", " ", text)
 953     return text
 954 }
 955
 956 function remdiv(text, tag) {
 957     if (tag != "")
 958         pat = ".*<div id=." tag ".[^>]*>[ \t\n]*"
 959     else
 960         pat = ".*<div[^>]*>[ \t\n]*"
 961     sub(pat, "", text)
 962     while (text !~ "/?div")
 963     {
 964         if (getline more <= 0)
 965             break
 966         text = text "\n" more
 967     }
 968     sub("[ \t\n]*</div>.*", "", text)
 969     debug(3, "Div:\n" text)
 970     return text
 971 }
 972
 973 function remspan(text, tag) {
 974     if (tag != "")
 975         pat = ".*<span id=." tag ".[^>]*>[ \t\n]*"
 976     else
 977         pat = ".*<span[^>]*>[ \t\n]*"
 978     sub(pat, "", text)
 979     while (text !~ "/?span")
 980     {
 981         if (getline more <= 0)
 982             break
 983         text = text "\n" more
 984     }
 985     sub("[ \t\n]*</span>.*", "", text)
 986     debug(3, "Span:\n" text)
 987     return text
 988 }
 989
 990 function remspanlong(text, tag) {
 991     if (tag != "")
 992         pat = ".*<span id=." tag ".[^>]*>[ \t\n]*"
 993     else
 994         pat = ".*<span[^>]*>[ \t\n]*"
 995     sub(pat, "", text)
 996     # i = "span level"
 997     i = 1; j = 0
 998     debug(2, length(text) "\t" i "   " j++ "   " text)
 999     # input is in text
1000     while (i != 0)
1001     {
1002         # emergency exit
1003         if (length(text) > 500000)
1004         {
1005             debug(0, "Warning: logs exceeded 500,000 bytes!")
1006             break
1007         }
1008         # cleanup: remove </*span...>, adjust "span level"
1009         while (text ~ "</*span.*>")
1010         {
1011             if (text ~ "</span>")
1012             {
1013                 --i; sub("</span>", "", text)
1014             }
1015             if (text ~ "<span.*>")
1016             {
1017                 ++i; sub("<span[^>]*>", "", text)
1018             }
1019         }
1020         debug(2, "=" length(text) "\t" i "   " j++ "   " text)
1021         # if "span level" down to zero, closing tag reached
1022         if (i == 0) break
1023         # get more input
1024         if (getline more <= 0)
1025             break
1026         text = text "\n" more
1027         debug(2, "+" length(more) "\t" i "   " j++ "   " more)
1028     }
1029     debug(1, length(text) "\t" i "   " j++)
1030     sub("[ \t\n]*</span>.*", "", text)
1031     gsub("&nbsp;", " ", text)
1032     if (tag == "CacheLogs")
1033         gsub("</?table[^>]*>", "", text)
1034     debug(3, "SpanLong:\n" text)
1035     return text
1036 }
1037
1038 function remwaypoints() {
1039     text = ""
1040     while (text !~ "</table>" && text !~ "No additional waypoints to display")
1041     {
1042         if (getline more <= 0)
1043             break
1044         text = text " " more
1045     }
1046     gsub("&nbsp;", " ", text)
1047     gsub("\n[ \t]*", "", text)
1048     debug(3, "Waypoints:\n" text "\nEnd Waypoints")
1049     return text
1050     # will return complete table contents! split by </tr> instead of
1051     # <STRONG><img...>
1052 }
1053
1054 function splitwaypoints(waypoints,
1055             line, fld, prefix, lookup, wpname, x, y, lat, lon) {
1056     text=""
1057     # separate lines
1058     split(waypoints, wps, "</tr>")
1059     i = 0
1060     for (wp in wps)
1061         ++i
1062     wp = 1 # skip header line
1063     while (wp < i)
1064     {
1065         ++wp
1066         # get URL from full table line
1067         url = wps[wp]
1068         gsub(".*href=.", "", url)
1069         gsub("\".*", "", url)
1070         if (url !~ "^http:")
1071         {
1072             url = ""
1073         }
1074         else
1075         {
1076             debug(1, "url: " url)
1077         }
1078         # individual fields without leading/trailing blanks, remove HTML tags
1079         split(wps[wp], line, "</td>")
1080         j = 0
1081         for (fld in line)
1082         {
1083             ++j
1084             debug(2, "Before Line[" fld "]: " line[fld])
1085             gsub("[ \t]*<[^>]*>", "", line[fld])
1086             gsub("^[ \t]*", "", line[fld])
1087             gsub("[ \t]*$", "", line[fld])
1088             debug(2, "after Line[" fld "]: " line[fld])
1089         }
1090         # 8 fields: 1st line old style
1091         # 9 fields: 1st line new style
1092         # 4 fields, [1]~"Note:": 2nd line old style
1093         # 4 fields, [2]~"Note:": 2nd line new style
1094         # else: drop
1095         if (j == 8)
1096         {
1097             # main information line, old style (pre-2010/07)
1098             if (!line[3]) continue
1099             prefix = substr(line[3] "00", 1, 2)
1100             lookup = line[4]
1101             wpname = line[5]
1102             lat = toupper(line[6])
1103             gsub(" *[EW].*", "", lat)
1104             split(lat, y)
1105             lat = y[2] + y[3]/60.0
1106             if (y[1] == "S")
1107                 lat = -lat
1108             lon = toupper(line[6])
1109             gsub("[NS] *[0-9]*.. *[0-9.]* ", "", lon)
1110             gsub("[^ 0-9.NESW-]", "", lon)
1111             split(lon, x)
1112             lon = x[2] + x[3]/60.0
1113             if (x[1] == "W")
1114                 lon = -lon
1115             text = text sprintf("\nlat=\"%.6f\" lon=\"%.6f\"|%s|%s|%s|%s",
1116                                 lat, lon, prefix, lookup, wpname, url)
1117         }
1118         else if (j == 9)
1119         {
1120             # main information line, new style (2010/07)
1121             if (!line[4]) continue
1122             prefix = substr(line[4] "00", 1, 2)
1123             lookup = line[5]
1124             wpname = line[6]
1125             lat = toupper(line[7])
1126             gsub(" *[EW].*", "", lat)
1127             split(lat, y)
1128             lat = y[2] + y[3]/60.0
1129             if (y[1] == "S")
1130                 lat = -lat
1131             lon = toupper(line[7])
1132             gsub("[NS] *[0-9]*.. *[0-9.]* ", "", lon)
1133             gsub("[^ 0-9.NESW-]", "", lon)
1134             split(lon, x)
1135             lon = x[2] + x[3]/60.0
1136             if (x[1] == "W")
1137                 lon = -lon
1138             text = text sprintf("\nlat=\"%.6f\" lon=\"%.6f\"|%s|%s|%s|%s",
1139                                 lat, lon, prefix, lookup, wpname, url)
1140         }
1141         else if (j == 4)
1142         {
1143             if (line[1] ~ "Note:")
1144             {
1145                 # continuation line, old style
1146                 text = text sprintf("|%s", line[2])
1147             }
1148             else if (line[2] ~ "Note:")
1149             {
1150                 # continuation line, new style
1151                 text = text sprintf("|%s", line[3])
1152             }
1153         }
1154     }
1155     debug(3, "Split WPs\n" text)
1156     return text
1157 }
1158
1159 function wpclean(waypoints,     line, fld, prefix, lookup, wpname, coords) {
1160     # simplify Additional Waypoints table:
1161     # prefixedname - name<br>coordfield<br>note
1162     text = ""
1163     split(waypoints, wps, "</tr>")
1164     i = 0
1165     for (wp in wps)
1166         ++i
1167     wp = 1
1168     while (wp < i)
1169     {
1170         ++wp
1171         split(wps[wp], line, "</td>")
1172         j = 0
1173         for (fld in line)
1174         {
1175             ++j
1176             gsub("[ \t]*<[^>]*>", "", line[fld])
1177             gsub("^[ \t]*", "", line[fld])
1178             gsub("[ \t]*$", "", line[fld])
1179         }
1180         # 8 fields: 1st line old style
1181         # 9 fields: 1st line new style
1182         # 4 fields, [1]~"Note:": 2nd line old style
1183         # 4 fields, [2]~"Note:": 2nd line new style
1184         # else: drop
1185         if (j == 8)
1186         {
1187             # main information line, old style (pre-2010/07)
1188             if (!line[3]) continue
1189             prefix = substr(line[3] "00", 1, 2) substr(gcid, 3)
1190             lookup = line[4]
1191             wpname = line[5]
1192             gsub(" \\(.*\\).*", "", wpname)
1193             coords = toupper(line[6])
1194             text = text sprintf("%s - %s<br />%s<br />", prefix, wpname, coords)
1195         }
1196         else if (j == 9)
1197         {
1198             # main information line, new style (2010/07)
1199             if (!line[4]) continue
1200             prefix = substr(line[4] "00", 1, 2) substr(gcid, 3)
1201             lookup = line[5]
1202             wpname = line[6]
1203             gsub(" \\(.*\\).*", "", wpname)
1204             coords = toupper(line[7])
1205             text = text sprintf("%s - %s<br />%s<br />", prefix, wpname, coords)
1206         }
1207         else if(j == 4)
1208         {
1209             if (line[1] ~ "Note:")
1210             {
1211                 # continuation line, old style
1212                 text = text sprintf("%s<br />", line[2])
1213             }
1214             else if (line[2] ~ "Note:")
1215             {
1216                 # continuation line, new style
1217                 text = text sprintf("%s<br />", line[3])
1218             }
1219         }
1220     }
1221     debug(3, "Clean WPs\n" text)
1222     return text
1223 }
1224
1225 function hex2dec(x,   val) {
1226     for (val = 0; length(x); x = substr(x, 2))
1227         val = 16*val + index("0123456789ABCDEF", substr(x, 1, 1)) - 1
1228     return val
1229 }
1230
1231 # Convert GC0000 to 58913
1232 function wp2id(wp,    val) {
1233     sub("^GC", "", wp)
1234     debug(5, "wp2id: " wp " ...")
1235     if ((length(wp) <= 4) && (wp < "G000"))
1236     {
1237         # old hex style
1238         val = hex2dec(wp)
1239         debug(5, "wp2id hex: " val " ...")
1240         return val
1241     }
1242     # new style, base-31, can have 4 or more places!
1243     set = "0123456789ABCDEFGHJKMNPQRTVWXYZ"
1244     val = 0
1245     for (pos = 1; pos <= length(wp); ++pos)
1246     {
1247         val *= 31
1248         val += index(set, substr(wp, pos, 1)) - 1
1249     }
1250     val = val - 411120
1251     debug(5, "wp2id id: " val " ...")
1252     return val
1253 }
1254
1255 # to decode hints: rot13 http://lorance.freeshell.org/rot13/
1256 function rot13 (string) {
1257     ROTFROM = "nopqrstuvwxyzabcdefghijklmNOPQRSTUVWXYZABCDEFGHIJKLM"
1258     ROTTO   = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
1259     retstr = ""
1260     for (pos = 0; pos < length(string); pos++)
1261     {
1262         char = substr(string,pos + 1,1)
1263         rotpos = index(ROTFROM,char)
1264         if (rotpos > 0)
1265             char = substr(ROTTO,rotpos,1)
1266         retstr = retstr char
1267     }
1268     return retstr
1269 }
1270
1271 function tagstart(lvl, tag, parms) {
1272     printf "%*s", lvl*2, ""
1273     if (parms == "")
1274         printf "<%s>\n", tag
1275     else
1276         printf "<%s %s>\n", tag, parms
1277 }
1278
1279 function tagend(lvl, tag) {
1280     printf "%*s", lvl*2, ""
1281     printf "</%s>\n", tag
1282 }
1283
1284 function ee(text) {
1285     gsub(/&/, "\\&amp;", text)
1286     gsub(/</, "\\&lt;", text)
1287     gsub(/>/, "\\&gt;", text)
1288     return text
1289 }
1290
1291 function tagtext(lvl, tag, text) {
1292     text = ee(text)
1293     printf "%*s", lvl*2, ""
1294     printf "<%s>%s</%s>\n", tag, text, tag
1295 }
1296
1297 function tagptext(lvl, tag, parms, text) {
1298     text = ee(text)
1299     printf "%*s", lvl*2, ""
1300     printf "<%s %s>%s</%s>\n", tag, parms, text, tag
1301 }
1302
1303 function attr_begin1(gif, id, text) {
1304     debug(1, "attr_begin1: " gif " " id " \"" text "\"")
1305     attr_id[gif] = id; attr_text[gif] = text
1306     debug(1, "attr_id: " attr_id["slealth"])
1307     debug(1, "attr_id: " attr_id[gif])
1308 }
1309 function attr_begin() {
1310     # attr_begin1("slealth", 40, "Stealth required")  Dont work!!!
1311     attr_id["dog"] = 1; attr_text["dog"] = "Dogs"
1312     attr_id["dogs"] = 1; attr_text["dogs"] = "Dogs allowed"
1313     attr_id["fee"] = 2; attr_text["fee"] = "Access or parking fee"
1314     attr_id["rappelling"] = 3; attr_text["rappelling"] = "Climbing gear"
1315     attr_id["boat"] = 4; attr_text["boat"] = "Boat"
1316     attr_id["scuba"] = 5; attr_text["scuba"] = "Scuba gear"
1317     attr_id["kids"] = 6; attr_text["kids"] = "Recommended for kids"
1318     attr_id["onehour"] = 7; attr_text["onehour"] = "Takes less than an hour"
1319     attr_id["scenic"] = 8; attr_text["scenic"] = "Scenic view"
1320     attr_id["hiking"] = 9; attr_text["hiking"] = "Significant hike"
1321
1322     attr_id["climbing"] = 10; attr_text["climbing"] = "Difficult climbing"
1323     attr_id["wading"] = 11; attr_text["wading"] = "May require wading"
1324     attr_id["swimming"] = 12; attr_text["swimming"] = "May require swimming"
1325     attr_id["available"] = 13; attr_text["available"] = "Available at all times"
1326     attr_id["night"] = 14; attr_text["night"] = "Recommended at night"
1327     attr_id["winter"] = 15; attr_text["winter"] = "Available during winter"
1328     # 16
1329     attr_id["poisonoak"] = 17; attr_text["poisonoak"] = "Poison plants"
1330     attr_id["dangerousanimals"] = 18; attr_text["dangerousanimals"] = "Dangerous Animals"
1331     attr_id["ticks"] = 19; attr_text["ticks"] = "Ticks"
1332
1333     attr_id["mines"] = 20; attr_text["mines"] = "Abandoned mines"
1334     attr_id["cliff"] = 21; attr_text["cliff"] = "Cliff / falling rocks"
1335     attr_id["hunting"] = 22; attr_text["hunting"] = "Hunting"
1336     attr_id["danger"] = 23; attr_text["danger"] = "Dangerous area"
1337     attr_id["wheelchair"] = 24; attr_text["wheelchair"] ="Wheelchair accessible"
1338     attr_id["parking"] = 25; attr_text["parking"] = "Parking available"
1339     attr_id["public"] = 26; attr_text["public"] = "Public transportation"
1340     attr_id["water"] = 27; attr_text["water"] = "Drinking water nearby"
1341     attr_id["restrooms"] = 28; attr_text["restrooms"] ="Public restrooms nearby"
1342     attr_id["phone"] = 29; attr_text["phone"] = "Telephone nearby"
1343
1344     attr_id["picnic"] = 30; attr_text["picnic"] = "Picnic tables nearby"
1345     attr_id["camping"] = 31; attr_text["camping"] = "Camping available"
1346     attr_id["bicycles"] = 32; attr_text["bicycles"] = "Bicycles"
1347     attr_id["motorcycles"] = 33; attr_text["motorcycles"] = "Motorcycles"
1348     attr_id["quads"] = 34; attr_text["quads"] = "Quads"
1349     attr_id["jeeps"] = 35; attr_text["jeeps"] = "Off-road vehicles"
1350     attr_id["snowmobiles"] = 36; attr_text["snowmobiles"] = "Snowmobiles"
1351     attr_id["horses"] = 37; attr_text["horses"] = "Horses"
1352     attr_id["campfires"] = 38; attr_text["campfires"] = "Campfires"
1353     attr_id["thorns"] = 39; attr_text["thorns"] = "Thorns"
1354
1355     attr_id["stealth"] = 40; attr_text["stealth"] = "Stealth required"
1356     attr_id["stroller"] = 41; attr_text["stroller"] = "Stroller accessible"
1357     attr_id["firstaid"] = 42; attr_text["firstaid"] = "Needs maintenance"
1358     attr_id["cow"] = 43; attr_text["cow"] = "Watch for livestock"
1359     attr_id["flashlight"] = 44; attr_text["flashlight"] = "Flashlight required"
1360     attr_id["landf"] = 44; attr_text["landf"] = "Lost And Found Tour"
1361     attr_id["rv"] = 46; attr_text["rv"] = "Recreational Vehicle"
1362     attr_id["field"] = 47; attr_text["field"] = "Field Puzzle"
1363     attr_id["UV"] = 48; attr_text["UV"] = "UV Light Required"
1364     attr_id["snowshoes"] = 49; attr_text["snowshoes"] = "Snowshoes"
1365
1366     attr_id["skiis"] = 50; attr_text["skiis"] = "Cross Country Skis"
1367     attr_id["s-tool"] = 51; attr_text["s-tool"] = "Special Tool Required"
1368     attr_id["nightcache"] = 52; attr_text["nightcache"] = "Night Cache"
1369     attr_id["parkngrab"] = 53; attr_text["parkngrab"] = "Park and Grab"
1370     attr_id["AbandonedBuilding"] = 54; attr_text["AbandonedBuilding"] = "Abandoned Structure"
1371     attr_id["hike_short"] = 55; attr_text["hike_short"] = "Short hike (less than 1km)"
1372     attr_id["hike_med"] = 56; attr_text["hike_med"] = "Medium hike (1km-10km)"
1373     attr_id["hike_long"] = 57; attr_text["hike_long"] = "Long hike (+10km)"
1374     attr_id["fuel"] = 58; attr_text["fuel"] = "Fuel Nearby"
1375     attr_id["food"] = 59; attr_text["food"] = "Food Nearby"
1376
1377     attr_id["wirelessbeacon"] = 60; attr_text["wirelessbeacon"] = "Wireless Beacon"
1378     attr_id["partnership"] = 61; attr_text["partnership"] = "Partnership"
1379     attr_id["seasonal"] = 62; attr_text["seasonal"] = "Seasonal Access"
1380     attr_id["tourist"] = 63; attr_text["tourist"] = "Tourist Friendly"
1381     attr_id["treeclimbing"] = 64; attr_text["treeclimbing"] = "Tree Climbing"
1382     attr_id["frontyard"] = 65; attr_text["frontyard"] = "Front Yard (Private Residence)"
1383     attr_id["teamwork"] = 66; attr_text["teamwork"] = "Teamwork Required"
1384 }
1385
1386 function tagattr(lvl, kind, yesno) {
1387     kind = kind ""
1388     #debug(1, "kind: \"" kind "\"")
1389     if (attr_id[kind] == 0)
1390         return
1391     printf "%*s", lvl*2, ""
1392     printf "<groundspeak:attribute id=\"%d\" inc=\"%d\">", attr_id[kind], yesno
1393     printf "%s", attr_text[kind]
1394     printf "</groundspeak:attribute>\n"
1395 }
1396
1397 /cache_types.aspx/ {    # gc 02/01/11
1398     gs_type = $0
1399     sub(/.* alt=./, "", gs_type)
1400     sub(/. width=.*/, "", gs_type)
1401     debug(1, "type: " gs_type)
1402 }
1403 /<span id="ctl00_ContentBody_CacheName">/ {
1404     if (gs_type)
1405     {
1406         gs_name = remspan($0, "ctl00_ContentBody_CacheName")
1407         next
1408     }
1409     gs_type = $0
1410     sub(/.* alt=./, "", gs_type)
1411     sub(/. width=.*/, "", gs_type)
1412     debug(1, "type: " gs_type)
1413 }
1414 /<span id="CacheName">/         { gs_name = remspan($0, "CacheName") }
1415 /<span id="ctl00_ContentBody_CacheName">/ {
1416     gs_name = remspan($0, "ctl00_ContentBody_CacheName")
1417 }
1418 /<span id=".*WaypointName".*>/  { gcid = remspan($0) }
1419 /;wp=GC.*" /    {
1420     # new way, yech!
1421     gcid = $0; sub(/.*wp=/, "", gcid); sub(/".*/, "", gcid)
1422 }
1423 /<span id=".*ShortDescription">/        {
1424     gs_short_description = remspan($0)
1425 }
1426 /<span id="LongDescription">/   {
1427     gs_long_description = remspanlong($0, "LongDescription")
1428     waypoints = ""
1429 }
1430 /<span id="ctl00_ContentBody_LongDescription">/ {
1431     gs_long_description = remspanlong($0, "ctl00_ContentBody_LongDescription")
1432     waypoints = ""
1433 }
1434 /<div id="div_hint"/            {
1435     hints = remdiv($0)
1436     gsub("\n", " ", hints)
1437     gsub("^ *", "", hints)
1438     gsub("<br>", "\n", hints)
1439     if (DECODE)
1440         hints=rot13(hints)
1441 }
1442 /<span id="Hints"/              {
1443     hints = remspan($0)
1444     hints = htmlclean(hints)
1445     if (DECODE)
1446         hints=rot13(hints)
1447     gsub("\n", " ", hints)
1448 }
1449 /<span id="ctl00_ContentBody_Hints"/            {
1450     hints = $0
1451     sub(".*displayMe.>", "", hints)
1452     sub("</span>.*", "", hints)
1453     gsub("<br>", "\n", hints)
1454     # debug(1, "Hints: " hints)
1455     if (DECODE)
1456         hints=rot13(hints)
1457 }
1458 /<b>Additional Waypoints/       {
1459     waypoints = remwaypoints()
1460     wplist = splitwaypoints(waypoints)
1461 }
1462 /<strong>Additional Waypoints/  {
1463     waypoints = remwaypoints()
1464     wplist = splitwaypoints(waypoints)
1465 }
1466 /class="LogsTable Table"/ {     # old
1467     logs_section = 1
1468 }
1469 /class="LogsTable"/ {           # new 06/28/11
1470     logs_section = 1
1471 }
1472 (logs_section > 0) {
1473     logs = logs $0
1474 }
1475 (logs_section > 0) && /<table/ {
1476     logs_section += 1
1477 }
1478 (logs_section > 0) && /<\/table>/ {
1479     logs_section -= 1
1480 }
1481
1482 /<span id="CacheLogs">/         {
1483     logs = remspanlong($0, "CacheLogs")
1484     # remove header which does not exist >2010-01-12
1485     sub(".*td class=.containerHeader.>Cache Logs</td></tr>", "", logs)
1486 }
1487 /<span id="ctl00_ContentBody_CacheLogs">/               {
1488     logs = remspanlong($0, "ctl00_ContentBody_CacheLogs")
1489 }
1490 /<span id=".*CacheStats">/      { stats = remspan($0) }
1491 /<span id=".*NumVisits">/       {
1492     numvisits = remspan($0)
1493     debug(1, numvisits)
1494 }
1495
1496 /lnkPrintFriendly/ {
1497     gid = $0
1498     if (gid ~ /ID=/)
1499     {
1500         # Printable page has ID number
1501         sub(/^.*ID=/, "", gid)
1502         sub(/&.*/, "", gid)
1503     }
1504     else
1505     {
1506         # Non-printable page has guid number
1507         sub(/^.*guid=/, "", gid)
1508         sub(/&.*/, "", gid)
1509     }
1510 }
1511 /^ *by <a href/ {
1512     gs_owner = $0
1513     sub(/.*ds=2.>/, "", gs_owner)
1514     sub(/<.*/, "", gs_owner)
1515     debug(1, "owner: " gs_owner)
1516     gs_guid = $0
1517     sub(/.*guid=/, "", gs_guid)
1518     sub(/&.*/, "", gs_guid)
1519 }
1520 /.* alt=.Size/ {
1521     gs_size = $0
1522     sub(/.*Size: /, "", gs_size); sub(". />.*", "", gs_size)
1523 }
1524 /<span id="CacheOwner"/ {
1525     text = remspan($0)
1526     debug(1, "Owner text " text)
1527     gs_type = text; sub(/<.*/, "", gs_type)
1528     gs_owner = text
1529         debug(1, gs_owner)
1530         sub(/.*<br>by /, "", gs_owner); sub(/ [[].*/, "", gs_owner)
1531         debug(1, gs_owner)
1532         sub(/<a[^>]*>/, "", gs_owner)
1533         sub(/<.a[^>]*>/, "", gs_owner)
1534         sub(/.*<br .>/, "", gs_owner)
1535         sub(/^by /, "", gs_owner)
1536         debug(1, "owner " gs_owner)
1537     gs_size = text; sub(/.*Size: /, "", gs_size); sub(/<.*/, "", gs_size)
1538     gs_guid = text; sub(/.*guid=/, "", gs_guid)
1539     sub(/&.*/, "", gs_guid)
1540     debug(1, "guid " gs_guid)
1541 }
1542 /<span id="ctl00_ContentBody_CacheOwner"/ {
1543     text = $0
1544     debug(2, "Owner text: " text)
1545     gs_type = text
1546     sub(/<br .*/, "", gs_type)
1547     sub(/.*>/, "", gs_type)
1548     debug(1, "gs_type: " gs_type)
1549
1550     gs_owner = text
1551     sub(/.*ds=2.>/, "", gs_owner); sub(/<.*/, "", gs_owner)
1552     debug(1, "gs_owner: " gs_owner)
1553
1554     gs_size = text; sub(/.*Size: /, "", gs_size); sub(/<.*/, "", gs_size)
1555     gs_guid = text; sub(/.*guid=/, "", gs_guid)
1556     sub(/&.*/, "", gs_guid)
1557     sub(/. title=.*/, "", gs_guid)
1558     debug(1, "guid: " gs_guid)
1559 }
1560 /<span id="ErrorText"/ {
1561     if ($0 ~ "unavailable")
1562         available = "False"
1563     if ($0 ~ "been archived")
1564         archived = "True"
1565 }
1566 /<span id="ctl00_ContentBody_ErrorText"/ {
1567     errortext = remspan($0, "ctl00_ContentBody_ErrorText")
1568     if (errortext ~ "unavailable")
1569         available = "False"
1570     if (errortext ~ "been archived")
1571         archived = "True"
1572     debug(1, "available: " available "; archived: " archived)
1573 }
1574 /<span id="LargeMapPrint"/ {
1575     text = remspan($0)
1576     lat = text; sub(/.*latitude=/, "", lat); sub(/&.*/, "", lat)
1577     lon = text; sub(/.*longitude=/, "", lon); sub(/\".*/, "", lon)
1578     sub(/&.*/, "", lon)
1579 }
1580 /var lat=[-0-9]/ {
1581     if (lat == "")
1582     {
1583         lat = $0; sub(/.*lat=/, "", lat); sub(/;.*/, "", lat)
1584         lon = $0; sub(/.*lng=/, "", lon); sub(/;.*/, "", lon)
1585     }
1586 }
1587 /<span id=".*Location"/ {
1588     text = remspan($0)
1589     gs_state = text
1590     sub(/In */, "", gs_state)
1591     sub(/,.*/, "", gs_state)
1592
1593     gs_country = text;
1594     sub(/.*, /, "", gs_country)
1595     sub(/ <.*/, "", gs_country)
1596     sub(/^In /, "", gs_country)
1597 }
1598 /lat=.*; lng=.*; guid=/ {
1599     if (lat == "")
1600     {
1601         lat = $0; sub(/.*lat=/, "", lat); sub(/;.*/, "", lat)
1602         lon = $0; sub(/.*lng=/, "", lon); sub(/;.*/, "", lon)
1603     }
1604 }
1605 /<span class="minorCacheDetails">Hidden/ {      # gc 2/1/11
1606     getline time
1607     getline time
1608     sub(/^ */, "", time)
1609     sub(/<.*/, "", time)
1610     split(time, fld, "/")
1611     time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2])
1612     debug(1, "time: " time)
1613 }
1614 /> <span class="minorCacheDetails">/ {  # gc 6/28/11
1615     getline time
1616     getline time
1617     getline time
1618     sub(/^ */, "", time)
1619     sub(/<.*/, "", time)
1620     gsub(/-/, "/", time)
1621     rc = split(time, fld, "/")
1622     if (rc == 1)
1623         rc = split(time, fld, "-")
1624     debug(1, "timerc: " rc)
1625     if (DATEFMT == 1)
1626         time = sprintf("%d-%02d-%02d", fld[3], fld[2], fld[1])
1627     else if (fld[1] >= 1000)
1628         time = sprintf("%d-%02d-%02d", fld[1], fld[2], fld[3])
1629     else
1630         time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2])
1631     debug(1, "time: " time)
1632 }
1633 /<span id="DateHidden">/ {
1634     getline text
1635     time = remspan($0)
1636     split(time, fld, "/")
1637     time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2])
1638 }
1639 /<span id="ctl00_ContentBody_DateHidden">/ {
1640     time = remspan($0, "ctl00_ContentBody_DateHidden")
1641     rc = split(time, fld, "/")
1642     if (rc == 3)
1643     {
1644         time = sprintf("%d-%02d-%02d", fld[3], fld[1], fld[2])
1645         debug(1, "time: " time)
1646         next
1647     }
1648     rc = split(time, fld, ",")
1649     if (rc == 3)
1650     {
1651         yyyy = fld[3];
1652         split(fld[2], fld, " ")
1653         mm = Month[ fld[1] ]
1654         dd = fld[2]
1655         time = sprintf("%d-%02d-%02d", yyyy, mm, dd)
1656         debug(1, "time: " time)
1657         next
1658     }
1659     time = ""
1660 }
1661 /ctl00_ContentBody_uxLegendScale/ {
1662     text = $0
1663     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1664     gs_diff = text
1665     debug(1 , "gs_diff: " gs_diff)
1666 }
1667 /ctl00_ContentBody_Localize6/ {
1668     text = $0
1669     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1670     gs_terr = text
1671     debug(1 , "gs_terr: " gs_terr)
1672 }
1673 /^ *Difficulty:<.strong>/ {
1674     getline text
1675     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1676     gs_diff = text
1677     debug(1 , "gs_diff: " gs_diff)
1678 }
1679 /^ *Difficulty:/ {      # gc 2/1/11
1680     getline text
1681     getline text
1682     getline text
1683     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1684     gs_diff = text
1685     debug(1 , "gs_diff: " gs_diff)
1686 }
1687 /<span id="Difficulty">/ {
1688     text = remspan($0)
1689     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1690     gs_diff = text
1691 }
1692 /<span id="ctl00_ContentBody_Difficulty">/ {
1693     text = remspan($0, "ctl00_ContentBody_Difficulty")
1694     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1695     debug(1, "difficulty " text)
1696     gs_diff = text
1697 }
1698 /^ *Terrain:<.strong>/ {
1699     getline text
1700     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1701     gs_terr = text
1702     debug(1 , "gs_terr: " gs_terr)
1703 }
1704 /^ *Terrain:/ {         # gc 2/1/11
1705     getline text
1706     getline text
1707     getline text
1708     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1709     gs_terr = text
1710     debug(1 , "gs_terr: " gs_terr)
1711 }
1712 /<span id="Terrain">/ {
1713     text = remspan($0)
1714     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1715     gs_terr = text
1716 }
1717 /<span id="ctl00_ContentBody_Terrain">/ {
1718     text = remspan($0, "ctl00_ContentBody_Terrain")
1719     sub(/.*alt=./, "", text); sub(/ .*/, "", text)
1720     debug(1, "terrain " text)
1721     gs_terr = text
1722 }
1723 /title=.What are Attributes?/ {
1724     text = $0
1725     debug(5, "Attr " text)
1726     gsub("<img src=./images/attributes/", "", text)
1727     # before 06/03/10
1728         gsub(/alt="[^"]*" width="30" height="30" .>/, "", text)
1729     # after 06/03/10
1730         gsub(/alt="[^"]*" title="[^"]*" width="30" height="30" .>/, "", text)
1731     gsub("<p class=.NoSpacing.*", "", text)
1732     gsub(/^ */, "", text)
1733     gsub(/\.gif../, "", text)
1734     gsub(/attribute-blank/, "", text)
1735
1736     attrs_yes = text
1737     gsub(/[a-z0-9A-Z]*-no/, "", attrs_yes)
1738     gsub(/-yes/, "", attrs_yes)
1739
1740     attrs_no = text
1741     gsub(/[a-z0-9A-Z]*-yes/, "", attrs_no)
1742     gsub(/-no/, "", attrs_no)
1743
1744     debug(1, "attrs_yes: " attrs_yes)
1745     debug(1, "attrs_no: " attrs_no)
1746     nattr_yes = split(attrs_yes, attr_yes, " ")
1747     nattr_no = split(attrs_no, attr_no, " ")
1748     debug(1, "nattr_yes: " nattr_yes)
1749     debug(1, "nattr_no: " nattr_no)
1750 }
1751 /^{.status.:.success/ {
1752     ParseJSON($0, json_logs)
1753     json_log_bool = 1
1754 }
1755
1756 BEGIN {
1757     Month["January"] = 1
1758     Month["February"] = 2
1759     Month["March"] = 3
1760     Month["April"] = 4
1761     Month["May"] = 5
1762     Month["June"] = 6
1763     Month["July"] = 7
1764     Month["August"] = 8
1765     Month["September"] = 9
1766     Month["October"] = 10
1767     Month["November"] = 11
1768     Month["December"] = 12
1769     BaseURL = "http://www.geocaching.com/seek/cache_details.aspx"
1770     attr_begin()
1771
1772     first = 1
1773
1774     wpt_init()
1775 }
1776 /<\/html>/ {
1777     if ((lat == "") || (lon == ""))
1778     {
1779         debug(0, "Waypoint coordinates not found for " gcid ", no output!")
1780         #next
1781     }
1782
1783     # too long a block to be indented
1784     if (!INCR && first)
1785     {
1786         print "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
1787         tagstart(0, "gpx")
1788         tagtext(1, "desc", "Geocache file generated by geo-html2gpx")
1789         tagtext(1, "author", "geo-html2gpx")
1790         "date +%Y-%m-%dT%H:%M:%S" | getline date
1791         tagtext(1, "time", date)
1792         first = 0
1793     }
1794
1795     gs_name = umlauts(gs_name)
1796     gs_owner = umlauts(gs_owner)
1797
1798     tagstart(1, "wpt", "lat=\"" lat "\" lon=\"" lon "\"")
1799     if (time != "")
1800         tagtext(2, "time", time "T00:00:00.0000000-07:00")
1801     tagtext(2, "name", gcid)
1802     tagtext(2, "desc", gs_name " by " gs_owner ", " \
1803                     gs_type " (" gs_diff "/" gs_terr ")")
1804
1805     # alternate URL... tagtext(2, "url", BaseURL "?wp=" gcid)
1806     # alternate URL... tagtext(2, "url", BaseURL "?id=" gid)
1807     tagtext(2, "url", BaseURL "?wp=" gcid)
1808     tagtext(2, "urlname", gs_name)
1809
1810     # we do this last... tagtext(2, "sym", sym)
1811
1812     tagtext(2, "type", "Geocache|" gs_type)
1813
1814     # FIXME? GC-written GPX files contain numeric, non-UUID,
1815     # cache/owner/finder ids
1816     # Oregon needs numeric cache id, or behaves erratically!
1817     gid = wp2id(gcid)
1818     tagstart(2, "groundspeak:cache",
1819             "id=\"" gid "\" available=\"" available \
1820             "\" archived=\"" archived "\"" \
1821             " xmlns:groundspeak=\"http://www.groundspeak.com/cache/1/0/1\"")
1822     tagtext(3, "groundspeak:name", gs_name)
1823     tagtext(3, "groundspeak:placed_by", gs_owner)
1824     tagptext(3,"groundspeak:owner", "id=\"" gs_guid "\"", gs_owner)
1825     tagtext(3, "groundspeak:type", gs_type)
1826
1827     if (nattr_yes != 0 || nattr_no != 0)
1828     {
1829         tagstart(3, "groundspeak:attributes")
1830         for (i = 1; i <= nattr_yes; ++i)
1831             tagattr(4, attr_yes[i], 1)
1832         for (i = 1; i <= nattr_no; ++i)
1833             tagattr(4, attr_no[i], 0)
1834         tagend(3, "groundspeak:attributes")
1835     }
1836
1837     tagtext(3, "groundspeak:container", gs_size)
1838     tagtext(3, "groundspeak:difficulty", gs_diff)
1839     tagtext(3, "groundspeak:terrain", gs_terr)
1840     tagtext(3, "groundspeak:country", gs_country)
1841     tagtext(3, "groundspeak:state", gs_state)
1842     if (!NOHTML)
1843     {
1844         tagptext(3, "groundspeak:short_description", "html=\"True\"",
1845                                                         gs_short_description)
1846         if (!NOWPTS && waypoints)
1847         {
1848             # reproduce "simplified table" by GC PQ
1849             # prefixed_gcid - wpname<br />original_style_coord<br />note<br />
1850             waypoints = wpclean(waypoints)
1851             # include "zero" waypoints here!
1852             gs_long_description = gs_long_description \
1853                                 "<p>Additional Waypoints</p>" waypoints
1854         }
1855         tagptext(3, "groundspeak:long_description", "html=\"True\"",
1856                                                         gs_long_description)
1857     }
1858     else
1859     {
1860         gs_short_description = htmlclean(gs_short_description)
1861         tagptext(3, "groundspeak:short_description", "html=\"False\"",
1862                                                         gs_short_description)
1863         gs_long_description = htmlclean(gs_long_description)
1864         if (waypoints)
1865             gs_long_description = gs_long_description \
1866                     "\n\nAdditional Waypoints\n" tableclean(waypoints)
1867         tagptext(3, "groundspeak:long_description", "html=\"False\"",
1868                                                         gs_long_description)
1869     }
1870     tagtext(3, "groundspeak:encoded_hints", hints)
1871
1872     if (json_log_bool)
1873     {
1874         nlogs = JSONArrayLength(json_logs, "data")
1875         debug(1, "New Logs: " nlogs)
1876         if (nlogs > 1)
1877             tagstart(3, "groundspeak:logs")
1878         else
1879             tagstart(3, "groundspeak:logs", "/")
1880
1881         for (i = 1; i < nlogs; ++i)
1882         {
1883             ltype = json_logs["data" SUBSEP i SUBSEP "LogTypeImage"]
1884             if (ltype ~ /smile/) ltype = "Found it"
1885             else if (ltype ~ /happy/) ltype = "Found it"
1886             else if (ltype ~ /note/) ltype = "Write note"
1887             else if (ltype ~ /sad/) ltype = "Didn'"'"'t Find it"
1888             else if (ltype ~ /attended/) ltype = "Attended"
1889             else if (ltype ~ /rsvp/) ltype = "Will Attend"
1890             else if (ltype ~ /greenlight/) ltype = "Green"
1891             else if (ltype ~ /traffic_cone/) ltype = "Archive"
1892             else if (ltype ~ /disabled/) ltype = "Temporarily Disable Listing"
1893             else if (ltype ~ /coord_update/) ltype = "Update Coordinates"
1894             else ltype = "Unknown"
1895
1896             ldate = json_logs["data" SUBSEP i SUBSEP "Visited"]
1897             lfinder = json_logs["data" SUBSEP i SUBSEP "UserName"]
1898             logid = json_logs["data" SUBSEP i SUBSEP "LogID"]
1899             guid = json_logs["data" SUBSEP i SUBSEP "LogGuid"]
1900             ltext = json_logs["data" SUBSEP i SUBSEP "LogText"]
1901             ltext = htmlclean(ltext)
1902             ltext = umlauts(ltext)
1903
1904             if (lfinder == USERNAME && ltype == "Found it")
1905                 sym = "Geocache Found"
1906             if (lfinder == USERNAME && ltype == "Attended")
1907                 sym = "Geocache Found"
1908             tagstart(4, "groundspeak:log", "id=\"" logid "\"")
1909             tagtext(5, "groundspeak:date", ldate)
1910             tagtext(5, "groundspeak:type", ltype)
1911             tagptext(5, "groundspeak:finder", "id=\"" guid "\"", lfinder)
1912             tagptext(5, "groundspeak:text", "encoded=\"" "False" "\"", ltext)
1913             tagend(4, "groundspeak:log")
1914         }
1915
1916         if (nlogs > 1)
1917             tagend(3, "groundspeak:logs")
1918     }
1919     else
1920     {
1921         # nlogs = split(logs, entry, "</tr>")
1922         nlogs = split(logs, entry, "</tr><tr>")
1923         if (nlogs > NUMLOGS+1)
1924             nlogs = NUMLOGS+1
1925
1926         if (nlogs > 1)
1927             tagstart(3, "groundspeak:logs")
1928         else
1929             tagstart(3, "groundspeak:logs", "/")
1930
1931         for (i = 1; i < nlogs; ++i)
1932         {
1933             sub("<tr><td[^>]*>", "", entry[i])
1934             sub("</td>", "", entry[i])
1935             if (!entry[i]) continue
1936             # old split location
1937             sub(/.*<[Ss][Tt][Rr][Oo][Nn][Gg]><img src=./, "", entry[i])
1938
1939             ltype = entry[i]
1940             #debug(1, "log: " ltype)
1941             sub(/>.*/, "", ltype) # leaves the URL of the smiley
1942             if (ltype ~ /smile/) ltype = "Found it"
1943             else if (ltype ~ /happy/) ltype = "Found it"
1944             else if (ltype ~ /note/) ltype = "Write note"
1945             else if (ltype ~ /sad/) ltype = "Didn'"'"'t Find it"
1946             else if (ltype ~ /attended/) ltype = "Attended"
1947             else if (ltype ~ /rsvp/) ltype = "Will Attend"
1948             else if (ltype ~ /greenlight/) ltype = "Green"
1949             else if (ltype ~ /traffic_cone/) ltype = "Archive"
1950             else if (ltype ~ /disabled/) ltype = "Temporarily Disable Listing"
1951             else if (ltype ~ /coord_update/) ltype = "Update Coordinates"
1952             else ltype = "Unknown"
1953
1954             ldate = entry[i]
1955             # split off &nbsp;/blank
1956             sub(/^[^>]*>[^ ;]*[ ;]/, "", ldate)
1957             sub(/ by <.*/, "", ldate)
1958             sub(/ by /, "", ldate)
1959             sub(/.*LogDate.>about /, "", ldate)
1960             sub(/.*LogDate.>/, "", ldate)
1961             sub(/<.*/, "", ldate)
1962             gsub(/-/, "/", ldate)
1963             debug(1, "logdate: " ldate)
1964             if (ldate ~ /ago/)
1965             {
1966                 cmd = sprintf("%s -d \"12am %s\" +%%Y-%%m-%%dT07:00:00Z",
1967                     DATE, ldate)
1968                 cmd | getline ldate; close(cmd)
1969             }
1970             else
1971             {
1972                 n = split(ldate, fld, " ")
1973                 if (n >= 2)
1974                 {
1975                     #old format: August 18
1976                     mm = Month[fld[1]]
1977                     dd = fld[2] + 0
1978                     if (n >= 3)
1979                         yy = fld[3]
1980                     if (yy+0 == 0)
1981                         yy = YR
1982                     ldate = sprintf("%d-%02d-%02dT07:00:00", yy, mm, dd)
1983                 }
1984                 n = split(ldate, fld, "/")
1985                 if (n == 3)
1986                 {
1987                     #new format: 08/18/2011
1988                     if (DATEFMT == 1)
1989                         ldate = sprintf("%d-%02d-%02dT07:00:00",
1990                             fld[3], fld[2], fld[1])
1991                     else
1992                         ldate = sprintf("%d-%02d-%02dT07:00:00",
1993                             fld[3], fld[1], fld[2])
1994                     debug(1, "logdate: " ldate)
1995                 }
1996             }
1997
1998             lfinder = entry[i]
1999             sub(/[^<]*</, "", lfinder)  # Delete all before <A NAME...
2000
2001             logid = lfinder
2002             sub(/[^"]*"/, "", logid)
2003             sub(/.* id="/, "", logid)
2004             sub(/.*LUID=/, "", logid)
2005             sub(/\".*/, "", logid)
2006             debug(1, "logid: " logid)
2007
2008             guid = lfinder
2009             debug(1, "guid: " guid)
2010             #sub(/[^>]*>/, "", guid)            # Delete all before <A HREF...
2011             #sub(/>.*/, "", guid)               # Delete all after <A HREF...
2012             sub(/.*guid=/, "", guid)
2013             sub(/\".*/, "", guid)
2014             sub(/\&.*/, "", guid)
2015             sub(/. id=.*/, "", guid)
2016             debug(1, "guid: " guid)
2017
2018             #debug(1, "lfinder: " lfinder)
2019             sub(/[^>]*>/, "", lfinder)          # Delete all before <A HREF...
2020             #debug(1, "lfinder: " lfinder)
2021             #sub(/[^>]*>/, "", lfinder) # Delete all before name
2022             sub(/<.*/, "", lfinder)             # Delete all after name
2023             lfinder = umlauts(lfinder)
2024             debug(1, "lfinder: " lfinder)
2025
2026             ltext = entry[i]
2027             sub(/.*found\)<br .>/, "", ltext)
2028             sub("</font>.*", "", ltext)
2029             sub("<a href=.log.aspx[^>]*>[^<]*</a>", "", ltext)
2030             sub("<a href=.upload.aspx[^>]*>[^<]*</a>", "", ltext)
2031             # remove remaining HTML tags from log text. Seems to be a good
2032             # idea in any case, independent of NOHTML setting!
2033             ltext = htmlclean(ltext)
2034             ltext = umlauts(ltext)
2035
2036             if (lfinder == USERNAME && ltype == "Found it")
2037                 sym = "Geocache Found"
2038             if (lfinder == USERNAME && ltype == "Attended")
2039                 sym = "Geocache Found"
2040             tagstart(4, "groundspeak:log", "id=\"" logid "\"")
2041             tagtext(5, "groundspeak:date", ldate)
2042             tagtext(5, "groundspeak:type", ltype)
2043             tagptext(5, "groundspeak:finder", "id=\"" guid "\"", lfinder)
2044             tagptext(5, "groundspeak:text", "encoded=\"" "False" "\"", ltext)
2045             tagend(4, "groundspeak:log")
2046         }
2047         if (nlogs > 1)
2048             tagend(3, "groundspeak:logs")
2049     }
2050
2051     tagstart(3, "groundspeak:travelbugs", "/")
2052
2053     tagend(2, "groundspeak:cache")
2054     tagtext(2, "sym", sym)
2055     tagend(1, "wpt")
2056
2057     # add Additional Waypoints in wpt form
2058     if (!NOWPTS && wplist)
2059     {
2060         split(wplist, wps, "\n")
2061         i = 0
2062         for (wp in wps)
2063             ++i
2064         wp = 0
2065         while (wp < i)
2066         {
2067             ++wp
2068             # lat lon|prefix|lookup|wpname|url|note
2069             # i.e.: lat="44.888267" lon="-93.159233"|PC|PARK|http://...
2070             #           |GCPMG6-Parking (Parking Area)|.31 miles from cache.
2071             debug(1, "wps: " wps[wp])
2072             split(wps[wp], line, "|")
2073             if (line[1] &&
2074                 (!NOZERO || (line[1] !~ "lat=\"0.000000\" lon=\"0.000000\"") ) )
2075             {
2076                 # line format: coords|prefix|lookup|wpname|note
2077                 tagstart(1, "wpt", line[1])
2078                 #tagtext(2, "time", "...")
2079                 tagtext(2, "name", line[2] substr(gcid,3))
2080                 tagtext(2, "cmt", line[6] ? line[6] : "")
2081                 statname = line[4]
2082                 gsub(" \\(.*\\).*", "", statname)
2083
2084                 desc = line[4]
2085                 sub(" \\(.*", "", desc)
2086                 tagtext(2, "desc", desc)
2087
2088                 tagtext(2, "url", line[5])
2089
2090                 urlname = desc
2091                 tagtext(2, "urlname", urlname)
2092
2093                 stattype = line[4]
2094                 gsub(".*\\(", "", stattype)
2095                 gsub("\\).*", "", stattype)
2096                 tagtext(2, "sym", stattype)
2097                 tagtext(2, "type", "Waypoint|" stattype)
2098                 tagend(1, "wpt")
2099             }
2100         }
2101     }
2102     wpt_init()
2103 }
2104 END {
2105     if (!INCR && !first)
2106         tagend(0, "gpx")
2107 }
2108 ' | $POSTPROC