#!/usr/pkg/bin/gawk -f # BEGIN { pmback = 30; pminc = 70; } function skipws(line) { sub(/^[ \t\r\n]+/, "", line); return (line); } function noctrl(line) { sub(/[ \t\r\n]+$/, "", line); return (line); } function putback(line) { putbackline = line; return (0); } function nextline(filename, line) { if (putbackline != "\001") { line = putbackline; putbackline = "\001"; } else if (getline line 0) line = noctrl(line); else return ("\001"); return (line); } function iseob(line) { if (line == "" || line ~ /^---/ || line ~ /^:[a-z]+:/ || line == "") { return (1); } return (0); } function readblock(filename, line, par) { if ((par = nextline(filename)) == "\001") return ("\001"); if (iseob(par)) return (par); while ((line = nextline(filename)) != "\001") { if (iseob(line)) { putback(line); break; } par = par "\n" line; } return (par); } function getlistend( string) { if (listp == 0) return (""); string = ""; return (string); } function makelistitem(string, level, seq, type, class, closeitem) { seq = ""; if (substr(string, 1, 1) == "." || substr(string, 2, 1) == ".") return (getlistend() "\n"); while (level < listp) seq = seq getlistend() "\n"; class = ""; if (match(string, /%[-a-z0-9]+=/) > 0) { class = substr(string, RSTART+1, RLENGTH-2); string = substr(string, 1, RSTART-1) substr(string, RSTART+RLENGTH); } closeitem = ""; if (listp > 0) { if (listtype[listp] == "OL" || listtype[listp] == "UL") closeitem = ""; } if (level == listp) type = listtype[listp]; else { # level > listp type = substr(string, 1, 1); if (type != ":" && length(string) > 1) type = substr(string, 2, 1); if (type == "#") type = "OL"; else if (type == "i" || type == "r") type = "OL CLASS=\"lowerroman\""; else if (type == "I" || type == "R") type = "OL CLASS=\"upperroman\""; else if (type == "a") type = "OL CLASS=\"loweralpha\""; else if (type == "A") type = "OL CLASS=\"upperalpha\""; else if (type == "-") type = "UL"; else if (type == "c") type = "UL CLASS=\"circle\""; else if (type == "d") type = "UL CLASS=\"disc\""; else if (type == "n") type = "UL CLASS=\"none\""; else if (type == "s") type = "UL CLASS=\"square\""; else if (type == ":") type = "DL"; else type = "UL"; listtype[++listp] = type; if (class != "") type = type " CLASS=\"" class "\""; seq = "<" type ">\n"; } sub(/ .+$/, "", type); if (type == "DL") seq = seq "
"; else if (type == "OL" || type == "UL") { if (class == "") seq = seq "
  • "; else seq = seq sprintf ("
  • ", class); } seq = closeitem seq; return (seq); } function makecell(seq, cell, stylecode) { if (match(seq, /[0-9A-F][0-9A-F]+c/) > 0) { cell = cell sprintf (" BGCOLOR=\"#%s\"", substr(seq, RSTART, RLENGTH-1)); sub(/[0-9A-F][0-9A-F]+c/, "", seq); } if (index(seq, "R") > 0) cell = cell " ALIGN=\"RIGHT\""; else if (index(seq, "C") > 0) cell = cell " ALIGN=\"CENTER\""; else if (index(seq, "L") > 0) cell = cell " ALIGN=\"LEFT\""; if (index(seq, "T") > 0) cell = cell " VALIGN=\"TOP\""; else if (index(seq, "M") > 0) cell = cell " VALIGN=\"MIDDLE\""; else if (index(seq, "B") > 0) cell = cell " VALIGN=\"BOTTOM\""; if (index(seq, "X") > 0) cell = cell " BORDER=\"1\""; if (match(seq, /[0-9]+%?w/) > 0) cell = cell sprintf (" WIDTH=\"%s\"", substr(seq, RSTART, RLENGTH - 1)); if (match(seq, /[0-9]+%?h/) > 0) cell = cell sprintf (" HEIGHT=\"%s\"", substr(seq, RSTART, RLENGTH - 1)); if (match(seq, /\|+/) > 0) cell = cell sprintf (" COLSPAN=\"%d\"", RLENGTH+1); if (match(seq, /:+/) > 0) cell = cell sprintf (" ROWSPAN=\"%d\"", RLENGTH+1); if (stylecode != "") cell = cell sprintf (" STYLE=\"%s\"", stylecode); cell = cell ">"; return (cell); } function makeformseq(seq, scriptcode, type, attr, i, n, x, y, z) { type = toupper(substr(seq, 1, 1)); seq = skipws(substr(seq, 2)); if (match(seq, /^([^"]+)[ \t]*"[ \t]*([^"]*)(".*)?/, x) == 0) return (""); x[2] = noctrl(x[2]); if (x[3] != "") { x[3] = skipws(noctrl(substr(x[3], 2))); } gsub(/"/, "\\"", x[2]); name = noctrl(x[1]); sub(/^.*[ \t]+/, "", name); if (match(x[1], /[0-9]+s/) > 0) attr = sprintf ("%s SIZE=\"%s\"", attr, substr(x[1], RSTART, RLENGTH-1)); if (match(x[1], /[0-9]+h/) > 0) attr = sprintf ("%s LINES=\"%s\"", attr, substr(x[1], RSTART, RLENGTH-1)); if (match(x[1], /=[-a-z0-9]+/) > 0) attr = attr sprintf (" CLASS=\"%s\"", substr(x[1], RSTART+1, RLENGTH-1)); if (type == "I") seq = sprintf ("", name, x[2], attr, scriptcode); else if (type == "P") seq = sprintf ("", name, x[2], attr, scriptcode); else if (type == "H") seq = sprintf ("", name, x[2], attr, scriptcode); else if (type == "S") seq = sprintf ("", name, x[2], attr, scriptcode); else if (type == "U") seq = sprintf ("", name, attr, scriptcode); else if (type == "A" || type == "T") { if (index(attr, "CLASS=") > 0) ; else if (match(x[1], /[0-9]+,[0-9]+s/) == 0) attr = "ROWS=\"10\" COLS=\"40\""; else { split(substr(x[1], RSTART, RLENGTH), y, ","); attr = sprintf ("%s ROWS=\"%d\" COLS=\"%d\"", attr, y[1], y[2]); } seq = sprintf ("", name, attr, x[2], scriptcode); } else if (type == "L" || type == "M") { seq = sprintf ("\n"; } else if (type == "C" || type == "R") { n = split(x[3], y, / *, */); for (i=1; i<=n; i++) z[y[i]] = 1; seq = sprintf ("", (type == "C")? "CHECKBOX": "RADIO", name, x[2], (x[2] in z)? " CHECKED": "", scriptcode); } else seq = sprintf ("%s|%s|%s", type, name, value); return (seq); } #ifdef WITH_PRINTLINKTABLE function rewritelink(base, path, i, k, n, x, y) { sub(/\?.*$/, "", path); if (path ~ /:\/\// || path ~ /^[a-z0-9]+:/) return (path); if (substr(path, 1, 1) != "/") path = base "/" path; n = split(path, x, /\/+/); k = 0; for (i=1; i<=n; i++) { if (x[i] == "") { if (i == n) y[k++] = ""; continue; } if (x[i] == ".") continue; else if (x[i] == "..") { if (k >= 1) k--; } else y[k++] = x[i]; } path = (substr(path, 1, 1) == "/"? "/": "") y[0]; for (i=1; i < k; i++) path = path "/" y[i]; return (path); } #endif function addlink(type, target, text, key) { gsub(/<[^>]*>/, "", text); key = type SUBSEP target; if (! (key in havelink)) { linklist[++linkcount] = type "\t" target "\t" text; havelink[key] = 1; } return (0); } function gettargetseq(href, framename, seq) { seq = ""; if (framename != "") seq = sprintf (" TARGET=\"%s\"", framename); else if (targetmode == "" || targetframe == "") { if (applyfirefoxpatch != 0) seq = sprintf (" TARGET=\"_self\""); } else if (targetmode == "external") { if (href ~ /^([a-z]+:|\.*\/)/) seq = sprintf (" TARGET=\"%s\"", targetframe); else if (applyfirefoxpatch != 0) seq = sprintf (" TARGET=\"_self\""); } else if (targetmode == "all") seq = sprintf (" TARGET=\"%s\"", targetframe); return (seq); } function rewritehref(href) { if (substr(href, 1, 7) == "mailto:" || substr(href, 1, 5) == "news:") return (href); if (href ~ /\.yawk$/) href = href ".html"; else if (href ~ /(([0-9]+|[a-z]+)-?)?[A-Z]+[a-z0-9]+([A-Z]+[a-z0-9]+)+/) { if (href ~ /\.[[:alnum:]]+$/) sub(/.*\//, "", href); else { sub(/.*\//, "", href); href = tolower(gensub(/([[:lower:]])([[:upper:]])/, "\\1-\\2", "g", href)) ".html"; # href = tolower(gensub(/([a-z])([A-Z])/, "\\1-\\2", "g", href)) ".html"; } } return (href); } function toggle(par, mode, k, seq) { k = mode[par]+0; if (k == 0) { seq = "<" par ">"; k = 1; } else { seq = ""; k = 0; } mode[par] = k; return (seq); } function __so(string) { if (string != "") string = " STYLE=\"" skipws(string) "\""; return (string); } # "|" stylepattern "(^|\n) +:(\\.?[ \t]*|[^\n]+:[ \t]*\n?)" \ function makepar(par, pattern, i, k, n, x, y, modes, pre, seq, z) { if (pattern == "") { stylepattern = scriptpattern = "({{.*}} *)?"; pattern = "__|\\*\\*|==|%%|%\\*|::(\n|$)|\\^\\^|''" \ "|%:([0-F]+|[a-z]+)?(,([0-F]+|[a-z]+))?\\:" \ "|" stylepattern "%([-a-z0-9]+)?[-=]" \ "|''[^']*''" \ "|" stylepattern "(^|\n) +(#[irIRaA]?|-[cdns]?|-%[-a-z0-9]+=|\\.)(\\.?[ \t]*|[ \t]+)" \ "|" stylepattern "(^|\n) +:(\\.?[ \t]*|[^\n]+:\\+?[ \t]*\n?)" \ "|" stylepattern "(^|\n)##(=[-a-z0-9]+)?[-\\+!.#\\*]([aA1])?[ \t]*([^\n]*)($|\n)" \ "|" stylepattern "(^|\n)\\.([a-zA-Z][a-zA-Z0-9]*|\\.)([^\n]*)(\n|$)" \ "|" stylepattern "(^|[ \t\n]+)\\|([a-zA-Z0-9%|:,=#]*[-=:$\\|]|([=#*]?[a-zA-Z0-9%]+[ ,]*)*\\+)[ \t]*\n*|\\|\\." \ "|" scriptpattern ":\"( [EFCHILMPRASTU][^\n]*\"\\.|\\.)" \ "|(([0-9]+|[a-z]+)-?)?[A-Z]+[a-z0-9]+([A-Z]+[a-z0-9]+)+" \ "|mailto:[^@ \t]+@([-0-9A-Za-z]+\\.)+[a-z]+|news:[^: \t\n]+" \ "|(https?|gopher|nntp|ftp|telnet|file|[a-z][a-z0-9]*)://[^ \t\n]+" \ "|\\([^#!\n]*[#!][^#]+#[^\\)\n]*\\)" \ "|\\(:[ \t]+[^ \t\n\\)]*[ \t]+:\\)" \ "|" scriptpattern "\\[\\[[^\\]]+\\][^\\]]+\\]"; } delete mode; nowrap = 0; x = ""; while ((k = match(par, pattern)) > 0) { x = x substr(par, 1, RSTART-1); seq = substr(par, RSTART, RLENGTH); par = substr(par, RSTART+RLENGTH); #print "^" x ":" seq ":" par "~"; scriptcode = ""; if (match(seq, /^({{.*}} *)/, z) > 0) { scriptcode = " " skipws(noctrl(substr(z[1], 3, length(z[1]) - 5))); seq = substr(seq, RSTART+RLENGTH); #print "[" scriptcode ":" seq "]"; } else if (match(seq, /^{([^}]*)} */, z) > 0) { scriptcode = " " skipws(noctrl(z[1])); seq = substr(seq, RSTART+RLENGTH); } else if (match(seq, /^.{[^}]*}/) > 0) { scriptcode = " " skipws(noctrl(substr(seq, 3, RLENGTH-3))); seq = substr(seq, 1, 1) substr(seq, RSTART+RLENGTH); } if (seq == "__") x = x toggle("I", modes); else if (seq == "**") x = x toggle("B", modes); else if (seq == "%%") x = x toggle("U", modes); else if (seq == "==") x = x toggle("CODE", modes); else if (seq == "^^") { par = skipws(par); x = noctrl(x); } else if (seq == "%*") x = x toggle("STRIKE", modes); else if (seq ~ /^::/) x = x "
    " substr(seq, 3); else if (seq ~ /^%[^-= ]*[-=]$/) { tag = "SPAN"; if (substr(seq, length(seq), 1) == "=") tag = "DIV"; seq = substr(seq, 2, length(seq) - 2); if (seq == "") x = noctrl(x) sprintf ("", tag); else { x = x sprintf("<%s CLASS=\"%s\"%s>", tag, seq, __so(scriptcode)); if (tag == "DIV") sub(/^[ \t]+\n?/, "", par); else sub(/^[ \t]+/, "", par); } sub(/^\n*/, "", par); } else if (seq ~ /^%:[^ \.]*\:$/) { seq = substr(seq, 3, length(seq) - 3); if (seq == "") x = noctrl(x) ""; else { q = seq; sub(/,.*$/, "", q); if (q != "") { if (q ~ /^[0-9A-F]+$/) q = "#" q; y = sprintf (" color: %s;", q); } sub(/^[^,]*,?/, "", seq); if (seq != "") { if (seq ~ /^[0-9A-F]+$/) seq = "#" seq; y = y sprintf (" background: %s;", seq); } y = skipws(y); x = x sprintf ("", y); } } else if (seq == "''''") x = x "''"; else if (seq ~ /^''[^']+''/) x = x substr(seq, 3, length(seq) - 4); else if (substr(seq, 1, 4) == "\n " || seq ~ / *[-#:$]/) { pre = ""; seq = noctrl(seq); if (substr(seq, 1, 1) == "\n") { seq = substr(seq, 2); pre = "\n"; } match(seq, / +/); x = x pre makelistitem(skipws(seq), RLENGTH - 2); if (seq ~ /:$/) { sub(/ +:/, "", seq); if (listtype[listp] == "DL") { sub(/:$/, "\n
    \n", seq); par = seq par; } } } else if (match(seq, /##(=[-a-z0-9]+)?([-\+!.#\*])([aA1])?[ \t]*([^\n]*)(\n|$)/, z) > 0) { c = z[2]; p = z[3]; text = z[4]; if ((classpar = substr(z[1], 2)) == "") classpar = "nl"; if (c == ".") { if (innumlist != 0) x = x "\n"; } else if (c == "#" || c == "*") { # next item if (innumlist != 0) x = x "\n"; if (numlevel <= 0) { numlevel = 1; nlnumber[numlevel] = 0; } nlnumber[numlevel]++; q = sprintf ("%d", nlnumber[1]); if (numlevel == 1) q = q "."; else { for (i=2; i<=numlevel; i++) q = q "." sprintf ("%d", nlnumber[i]); } x = x sprintf ("
    %s %s
    \n", classpar, q, makepar(text, "")); x = x sprintf ("
    ", classpar); innumlist = 1; } else if (c == "+") { numlevel++; nlnumber[numlevel] = 0; } else if (c == "-") { numlevel--; if (numlevel < 1) numlevel = 1; } else if (c == "!") { if (innumlist != 0) x = x "
    \n"; numlevel = 1; nlnumber[numlevel] = 0; } } else if (match(seq, /(^|\n)\.([a-zA-Z][a-zA-Z0-9]*|\.)(.*)/, z) > 0) { if (z[2] == ".") x = x "\n"; else { x = x sprintf ("
    ", z[2]); if ((p = skipws(z[3])) != "") x = x noctrl(makepar(p, "")) "
    \n"; } } else if (seq ~ /^[ \t\n]*\|/) { seq = skipws(seq); seq = noctrl(substr(seq, 2)); c = substr(seq, length(seq), 1); seq = substr(seq, 1, length(seq) - 1); q = ""; classpar = style = ""; if (match(seq, /=[-a-zA-Z0-9]+/) > 0) { style = substr(seq, RSTART+1, RLENGTH-1); classpar = sprintf (" CLASS=\"%s\"", style); } #printf ("seq= %s\n", seq) >>"/dev/stderr"; if (match(seq, /#[-a-zA-Z0-9]+/) > 0) { classpar = classpar sprintf (" ID=\"%s\"", substr(seq, RSTART+1, RLENGTH-1)); sub(/#[-a-zA-Z0-9]+/, " ", seq); } if (tmode == 0 || c == "+") { tdepth++; tstack["title", tdepth] = ttitle; tstack["mode", tdepth] = tmode; tstack["lines", tdepth] = tlines; tstack["colors", tdepth] = tcolorlist; tstack["colnum", tdepth] = tcolnum; tstack["class", tdepth] = tclasslist; tstack["style", tdepth] = tstyle; tclasslist = ""; tclasscount = split(tclasslist, tclass, /,+/); tmode = 1; tlines = 0; tcolorlist = "CCFFCC,000001"; tcolors = split(tcolorlist, tcolor, ","); tstyle = style; q = classpar; if (match(seq, /\*[a-zA-Z0-9]+/) > 0) q = q " " sprintf ("NAME=\"%s\"", tolower(substr(seq, RSTART+1, RLENGTH-1))); extendedstyle = 0; ttitle = ""; if (c == "+") { if (match(par, /^[ \t]*"[^"]*"/) > 0) { ttitle = skipws(substr(par, RSTART, RLENGTH)); gsub(/^"|"$/, "", ttitle); par = substr(par, RSTART+RLENGTH); if (tstyle == "") tstyle = "table"; } } q = makecell(seq, "%s\n", tstyle, ttitle); q = sprintf ("
    \n", tstyle) q; } if (match(seq, /#[a-zA-Z0-9]+/) > 0) q = sprintf ("", substr(seq, RSTART+1, RLENGTH-1)) q } if (c == ".") { if (tmode == 3) { q = q "\n"; tmode = 2; } if (tmode == 2) q = q "\n"; q = q "
    \n"; if (ttitle != "") q = q ""; if (tdepth <= 0) { tmode = 0; tdepth = 0; } else { tmode = tstack["mode", tdepth]; tlines = tstack["lines", tdepth]; tcolorlist = tstack["colors", tdepth]; tcolnum = tstack["colnum", tdepth]; tclasslist = tstack["class", depth]; ttitle = tstack["title", depth]; tdepth--; tcolors = split(tcolorlist, tcolor, /,/); tclasscount = split(tclasslist, tclass, /,+/); } } else if (c == "-") { gsub(/[^0-9A-F]+/, ",", seq); tcolorlist = seq; tcolors = split(tcolorlist, tcolor, /,+/); tlines = 1; } else if (c == "$") { gsub(/[^a-zA-Z0-9_,]/, "", seq); tclasslist = seq; tclasscount = split(tclasslist, tclass, /,/); } else if (c == ":" || c == "=") { if (tmode == 3) { q = q "\n"; tmode = 2; } if (tmode == 2) q = q "\n"; if (tlines < 1 || tlines > tcolors) tlines = 1; if (c == ":") { tcolnum = 1; if (tcolor[tlines] != "000001") q = q sprintf ("\n", tcolor[tlines++]); else { q = q "\n"; tlines++; } if (classpar == "") { if (tclasscount >= 1) classpar = tclass[1]; else classpar = "data"; classpar = " CLASS=\"" classpar "\""; } q = q makecell(seq, "", tcolor[tlines++]); } q = q rowcode; tmode = 2; tcolnum = 0; } } else if (c == "|") { tcolnum++; if (tmode == 1) { tlines++; q = q sprintf("", tlines % 2 == 1? "p": "q"); } else if (tmode == 3) q = q "\n"; if (classpar == "") { if (tclasscount > 0) classpar = tclass[tcolnum >= tclasscount? tclasscount: tcolnum]; else classpar = "data"; classpar = " CLASS=\"" classpar "\""; } q = q makecell(seq, "\n"); else { seq = noctrl(skipws(substr(seq, 3, length(seq) - 4))); if ((c = substr(seq, 1, 1)) == "F" || c == "E") { url = skipws(substr(seq, 2)); if (url !~ /[ \t]+/) q = (c == "E")? "POST": "GET"; else { q = url; sub(/[ \t]+.*$/, "", q); sub(/^.*[ \t]+/, "", url); } targetname = ""; if (match(url, /=[^,]*,/) > 0) { targetname = substr(url, RSTART, RLENGTH-2); url = substr(url, RSTART + RLENGTH); } x = x sprintf ("
    ", q, url, (c == "E"? " ENCTYPE=\"multipart/form-data\"": ""), gettargetseq(url, targetname), scriptcode); } else if (seq != "") x = x makeformseq(seq, scriptcode); } } else if (match(seq, /^\([^#!]*[#!]/) > 0) { c = substr(seq, RSTART+RLENGTH-1, 1); classpar = ""; if (match(seq, /^[^ ]*=([-a-zA-Z0-9]+)/, z) > 0) classpar = sprintf (" CLASS=\"%s\"", z[1]); # classpar = sprintf (" CLASS=\"%s\"", substr(seq, RSTART+1, RLENGTH-1)); align = "l"; if (match(seq, /[mlrMLR]/) > 0) { align = substr(seq, RSTART, 1); } width = height = ""; if (match(seq, /[0-9]+%?,[0-9]+%?/) > 0) { split(substr(seq, RSTART, RLENGTH), z, ","); width = z[1]; height = z[2]; } sub(/^[^#!]*[#!][ \t]+/, "", seq); image = seq; sub(/[ \t]+#.*$/, "", image); text = seq; gsub(/^[^#]*#[ \t]*|[ \t]*\)$/, "", text); if (c == "!") { seq = sprintf (" 
    "; x = x seq; } else if (substr(seq, 1, 2) == "(:") { seq = skipws(noctrl(substr(seq, 3, length(seq) - 4))); x = x sprintf ("", seq); } else if (substr(seq, 1, 2) == "[[") { seq = substr(seq, 3); sub(/\]$/, "", seq); k = index(seq, "]"); target = noctrl(skipws(substr(seq, 1, k-1))); # if (match(target, /\+[_a-zA-Z0-9]+/) > 0) { if (match(target, /^\+[_a-zA-Z0-9]+/) > 0) { targetname = substr(target, RSTART+1, RLENGTH-1); target = skipws(substr(target, RSTART + RLENGTH)); } else targetname = ""; if (exportpage != 0) target = rewritehref(target); text = noctrl(skipws(substr(seq, k+1))); if (target == "") x = x makepar(text, ""); else { x = x sprintf("%s", target, gettargetseq(target, targetname), scriptcode, p = makepar(text, "")); addlink("href", target, p); # if (! (target in havelink)) { # linklist[++linkcount] = target "\t" p; # havelink[target] = 1; # } } } else if (seq ~ /:\/\//) x = x sprintf ("%s", seq, gettargetseq(seq), seq); else if (substr(seq, 1, 7) == "mailto:") x = x sprintf ("%s", seq, substr(seq, 8)); else if (substr(seq, 1, 5) == "news:") x = x sprintf ("%s", seq, seq); else { target = seq; if (exportpage != 0) target = rewritehref(target); x = x sprintf ("%s", target, gettargetseq(target), seq); } } x = x par; n = split("B I U CODE STRIKE", z, " "); seq = ""; for (i=1; i<=n; i++) { if (modes[z[i]]+0 > 0) seq = seq toggle(z[i], modes); } x = x seq; return (x); } function makeheading(par, c, tag, x) { if ((c = substr(par, 1, 1)) == "-") x = "
    "; else if (c == "*") { sub(/^[^ \t]+[ \t]+/, "", par) x = sprintf ("

    %s

    ", makepar(par, "")); } else if (index("123456", c) != 0) { sub(/^[^ \t]+[ \t]+/, "", par) x = sprintf ("%s", c, makepar(par, ""), c); } else if (match(par, /^\++/) > 0) { if (RLENGTH == 1) tag = "H3"; else if (RLENGTH == 2) tag = "H4"; else tag = "H5"; sub(/^[^ \t]+[ \t]+/, "", par) x = sprintf("<%s>%s", tag, makepar(par, ""), tag); } return (x); } function makeanchors(par, ctrl, seq, k, x, y) { x = ""; while ((k = match(par, /''[^']*''|\(\.[^\)]+\.\)|\(:[^\)]+:\)/)) > 0) { x = x substr(par, 1, RSTART-1); ctrl = substr(par, RSTART, RSTART+1); ctrl = substr(par, RSTART, 2); if (ctrl == "''") { x = x substr(par, RSTART, RLENGTH); par = substr(par, RSTART+RLENGTH); continue; } seq = skipws(noctrl(substr(par, RSTART+2, RLENGTH-4))); par = substr(par, RSTART+RLENGTH); if (ctrl == "(:") x = x sprintf ("", seq); else { seq = html = makepar(seq, ""); gsub(/<[^>]*>/, "", seq); anchor[na] = sprintf ("%s", na+1, seq); x = x sprintf ("%s", na+1, html); na++; } } x = x par; return (x); } function __closecommentpar__() { if (commentpar >= 0) { paragraph[commentpar] = "

    " paragraph[commentpar] "
    "; commentpar = -1; } return (0); } function __addcomment__(text) { if (commentpar < 0) { commentpar = np++; } if (paragraph[commentpar] != "") paragraph[commentpar] = paragraph[commentpar] "
    \n"; paragraph[commentpar] = paragraph[commentpar] text; return (commentpar); } function escape(string) { gsub(/&/, "\\&", string); gsub(/>/, "\\>", string); gsub(/]*>/, "", string); return (string); } function process(filename, pattern, j, k, x) { if (filename == "") return (0); putback("\001"); na = np = nonempty = 0; listp = linkcount = 0; delete havelink; delete(paragraph); commentpar = -1; tocpar = 0; tocstyle = "short"; tdepth = 0; tmode = 0; IGNORECASE = 0; while ((par = readblock(filename)) != "\001") { if (par == "") { paragraph[np++] = par; continue; } else if (par ~ /^:data:/) { $0 = par; p = tolower($2); $1 = $2 = ""; text = skipws($0); if ((k = attrnum[p]) == 0) { k = attrnum[p] = ++attrcount; attrlist[attrcount] = p; } attrtab[attrrownum, k] = text; continue; } else if (par ~ /^:wc:/) { $0 = par; if ($2 == "rel" && $3 !~ /:$/) addlink("rel", $3, $3); continue; } else if (par ~ /^:parser:/) { $0 = par; if ($2 == "toc" || $2 == "toc-style") { if ($2 == "toc") tocpar = np; k = 3; if ($k != "") { if ($k == "styled") { styledtoc = "yes"; k++ } if ($k == "long" || $k == "faq") tocstyle = "long" else if ($k == "list") tocstyle = "list"; else tocstyle = "short"; } } # else if ($2 == "toc-style") # tocstyle = ($3 == "long" || $3 == "faq")? "long": "short"; else if ($2 == "href") showhrefs = ($3 == "no" || $3 == "off")? "no": "yes"; else if ($2 == "enable-comments") { enablecomments = $3; } else if ($2 == "comment-anchor") { commentanchor++; $1 = $2 = ""; if (commentanchor != targetanchor) text = sprintf ("\n", commentanchor, skipws($0)); else { text = ""; text = text "
    \n"; text = text "
    Add Comment:
    \n"; text = text sprintf ("\n"); text = text sprintf ("\n"); text = text sprintf ("\n", targetanchor); text = text sprintf ("
    \n"); text = text sprintf ("\n"); text = text ""; text = text "
    "; } __closecommentpar__(); __addcomment__(text); # paragraph[np++] = text; } else if ($2 == "add-comment") { targetanchor = $3+0; #printf ("target anchor= %d\n", targetanchor) >>"/dev/stderr"; } continue; } else if (par ~ /^:[a-z]/) continue; else if (par ~ /^---{/) { par = ""; while ((line = nextline(filename)) != "\001") { if (line == "---{") break; par = par line "\n"; } paragraph[np++] = ""; continue; } else if (par ~ /^---<<$/) { par = ""; while ((line = nextline(filename)) != "\001") { if (line == "--->>") break; par = par line "\n"; } paragraph[np++] = noctrl(par); continue; } par = escape(par); par = makeanchors(par); if (par == "") { while ((line = nextline(filename)) != "\001") { if (line != "") { putback(line); break; } } paragraph[np] = "

    "; } else if (par ~ /^---%/) { type = substr(par, 4, 1); par = noctrl(skipws(substr(par, 5))); if (par == "") { if (divsp > 0) { class = divpar["class", divsp]; title = divpar["title", divsp]; divsp--; divcount[class]++; gsub(/##/, divcount[class]++, title); x = sprintf ("

    %s
    \n\n", class, title); } } else { divsp++; divpar["class", divsp] = "diversion"; divpar["title", divsp] = "Text ##"; if (match(par, /(=[-a-z0-9]+)?([ \t]+("[^"]"))/, z) > 0) { divpar["class", divsp] = z[1]; divpar["title", divsp] = substr(z[3], 2, length(z[3]) - 2); } x = sprintf ("
    ", divpar["class", divsp]); } paragraph[np] = x; } else if (match(par, /^---(=|:)#?/) > 0) { type = substr(par, 4, 1); linenumbers = (substr(par, RLENGTH, 1) == "#")? 1: 0; # par = noctrl(skipws(substr(par, 5))); par = noctrl(skipws(substr(par, RLENGTH+1))); extendedstyle = 0; class = "-"; if (match(par, /"[^"]*"/) > 0) { title = skipws(noctrl(substr(par, RSTART+1, RLENGTH-2))); par = substr(par, 1, RSTART-1) substr(par, RSTART+RLENGTH); class = "example"; extendedstyle = 1; } x = "
    ";
    			if (match(par, /^(=[-a-z0-9]+|-)([ \t]+|$)/)) {
    				class = noctrl(substr(par, RSTART+1, RLENGTH-1));
    				par = skipws(substr(par, RSTART+RLENGTH));
    				}
    
    			if (class != ""  &&  class != "-")
    				x = sprintf("
    ", class);
    			else {
    				x = "
    ";
    				extendedstyle = 0;
    				}
    
    			if (extendedstyle != 0  ||  class != "")
    				x = sprintf("
    \n", class) x; seq = "---" type; # if (par != "") # seq = par; #printf ("** seq= %s, type= %s\n", seq, type) >>STDERR while ((line = nextline(filename)) != "\001") { if (line == seq) break; line = escape(line); if (type != "=") { line = makeanchors(line); line = makepar(line, ""); } if (linenumbers != 0) line = sprintf ("%3d ", linenumbers++) line; x = x line "\n"; } x = x "
    "; if (extendedstyle != 0 || class != "") { if (extendedstyle != 0) { listingnumber[class]++; sub(/##/, listingnumber[class], title); x = x sprintf ("\n
    %s
    \n", class, title); } x = x "
    "; } paragraph[np] = x; } else if (par ~ /^---#/) { seq = par; par = ""; while ((line = nextline(filename)) != "\001") { if (line == seq) break; par = par line "\n"; } x = makepar(par, "__|\\*\\*|==|%%|%\\*|::(\n|$)|\\^\\^|''" \ "|(([0-9]+|[a-z]+)-?)?[A-Z]+[a-z0-9]+([A-Z]+[a-z0-9]+)+"); __addcomment__("
    " x "
    "); } else if (substr(par, 1, 3) == "---") paragraph[np] = makeheading(substr(par, 4)); else paragraph[np] = makepar(par, ""); if (skipws(paragraph[np]) != "" && paragraph[np] != "

    ") nonempty++; np++; if (textstart != 0 && nonempty >= startparcount) break; } close(filename); __closecommentpar__(); if (listp > 0) { seq = ""; while (listp > 0) seq = seq "\n" getlistend(); paragraph[np++] = seq; } if (plaintext != 0) { for (i=0; i]*>/, "", line); printf ("%s\n", line); } return (0); } else if (textstart != 0 || listattributes != 0) { text = ""; k = 0; while (k <= np && text == "") { text = skipws(noctrl(untag(paragraph[k++]))); gsub(/[ \t\r\n]+/, " ", text); } if (startparcount > 1) text = text "\002"; for (i=k; i<=np; i++) { if (paragraph[i] != "

    ") p = skipws(noctrl(untag(paragraph[i]))); else { p = (text != "")? "-": ""; if (i < np && paragraph[i+1] == "

    ") i++; } gsub(/[ \t\r\n]+/, " ", p); text = skipws(text " " p); if (length(text) >= startmaxchar) break; } if (length(text) > startmaxchar) { text = substr(text, 1, startmaxchar); sub(/[ \t]*[^ \t]*$/, "", text); } if (startparcount > 1) { text = "" text; sub(/(\002|$)/, "", text); } if (listattributes != 0) { attrtab[attrrownum, attrnum["title"]] = text; return (0); } printf ("%s\t%s\n", filename, text); fflush(); return (0); } else if (tablequery != 0) { if (tablename == "" || tablename == "-") pattern = "]*>"; else pattern = sprintf ("]*NAME=\"%s\"[^>]*>", tablename); for (i=0; i 0) { paragraph[i] = substr(paragraph[i], RSTART+RLENGTH); while (i < np) { line = paragraph[i++]; if (match(line, /<\/TABLE>/) > 0) line = substr(line, 1, RSTART-1); data = data " " line; if (RSTART > 0) break; } row = col = 0; while (match(data, /<(TR|TD)( [^>]*)?>/) > 0) { seq = substr(data, RSTART, RLENGTH); data = substr(data, RSTART+RLENGTH); if (substr(seq, 1, 3) == " 0) printf ("\n"); col = 1; row++; } else { seq = "N/A"; if (match(data, /<\/TD>/) > 0) { seq = substr(data, 1, RSTART-1); data = substr(data, RSTART+RLENGTH); gsub(/<[^>]*>/, "", seq); gsub(/[ \t\r\n]/, " ", seq); } if (col > 1) printf ("\t"); gsub(/</, "<", seq); gsub(/>/, ">", seq); gsub(/&/, "\\&", seq); printf ("%s", noctrl(skipws(seq))); col++; } } if (row > 0) printf ("\n"); break; } } exit (0); } else if (pattern != "") { if (case == 0) IGNORECASE = 1; k = 0; for (i=0; i]*>/, " ", line); n = split(line, z, "\n"); for (j=1; j<=n; j++) { k++; if (z[j] == "" || z[j] == " ") continue; if (match(z[j], pattern) > 0) { line = substr(z[j], RSTART-pmback, RLENGTH+pminc); seq = substr(z[j], ((RSTART - pmback) < 1? 1: RSTART - pmback) + RLENGTH + pminc); if (line ~ /^[ \t]/) line = skipws(line); else if (RSTART-pmback > 1 && match(substr(z[j], 1, RSTART - pmback - 1), /[^ \t]+$/) > 0) { line = substr(z[j], RSTART, RLENGTH) line; } if (line ~ /[ \t]$/) line = noctrl(line); else if (length(seq) > 0) { if (match(seq, /^[^ \t]+/) > 0) line = line substr(seq, 1, RLENGTH); } line = skipws(noctrl(line)); printf ("%d %s\n", k+1, line); } } } return (0); } else if (printlinksonly != 0) { if (printlinktable != 0) return (0); delete havelink; for (i=1; i <= linkcount; i++) { split(linklist[i], x, "\t"); if (x[1] == "href") { if (x[2] in havelink) continue; printf ("%s\t%s\n", x[2], x[3]); havelink[x[2]] = 1; } } return (0); } # # Remove tags if requested. # if (showhrefs == "no") { for (i=0; i]*>/, "", paragraph[i]); for (i=0; i]*>/, "", anchor[i]); } # # Emit text till the CONTENT-START marker. This text is meant to # be part of a template which does not increase our line counter. # here = 0; while (here < np) { if ((p = paragraph[here++]) == "") { break; } printf ("%s\n", p); } k = 0; for (i=here; i") continue; if (tocpar >= 0 && i >= tocpar) { if (0 && showhrefs == "no") ; else if (na > 0) { if (styledtoc == "yes") { if (tocstyle == "list") printf ("

    \n"); printf ("

    \n"); } if (tocpar == 0) { if (tocstyle == "long") printf ("
    \n"); else printf ("

    \n"); } } tocpar = -1; } n = split(paragraph[i], z, "\n"); for (j=1; j<=n; j++) { k++; if (k == linenumber) { printf ("", k); printf ("%s\n", z[j]); } else printf ("%s\n", z[j]); } } return (0); } function nextarg(par, arg) { if (argi >= ARGC) { printf ("%s: missing argument: %s\n", program, par) >STDERR; exit (1); } arg = ARGV[argi]; ARGV[argi++] = ""; return (arg); } BEGIN { program = "wiki-parser"; STDERR = "/dev/stderr"; case = search = plaintext = linenumber = exportpage = 0; textstart = 0; startparcount = 5; startmaxchar = 200; targetframe = ""; printlinksonly = 0; pattern = ""; argi = 1; while (argi < ARGC && substr(ARGV[argi], 1, 1) == "-") { options = nextarg("option"); if (options == "--") break; for (i = 2; i<=length(options); i++) { c = substr(options, i, 1); if (c == "p") plaintext = 1; else if (c == "a") { if (listattributes == 1) basedirectory = nextarg("base directory"); if (match(basedirectory, /:/) > 0) { sortby = substr(basedirectory, 1, RSTART-1); basedirectory = substr(basedirectory, RSTART+1); } listattributes = 1; } else if (c == "b") { if (textstart == 0) textstart = 1; else { split(nextarg("par and char count"), x, ","); if (x[1]+0 > 0) startparcount = x[1] + 0; if (x[2]+0 > 0) startmaxchar = x[2] + 0; } } else if (c == "q") { tablequery = 1; tablename = tolower(nextarg("tablename")); } else if (c == "s") search = 1; else if (c == "r") search = case = 1; else if (c == "t") { targetmode = "external"; targetframe = nextarg("target frame"); } else if (c == "L") printlinksonly = 1; #ifdef WITH_PRINTLINKTABLE else if (c == "M") { printlinktable = printlinksonly = 1; printlinktype = nextarg("linktype"); wikibasedir = nextarg("wikibasedir"); } #endif else if (c == "T") { targetmode = "all"; targetframe = nextarg("target frame"); } else if (c == "P") applyfirefoxpatch = 1; else if (c == "l") linenumber = nextarg()+0; else if (c == "x") exportpage = 1; else { printf ("%s: unkown option: -%s\n", program, c) >>STDERR; exit (1); } } } if (listattributes != 0 && textstart == 0) { attrheaderprinted = 0; textstart = 1; startparcount = 1; startmaxchar = 80; } if (search != 0) { if (getline pattern <= 0) exit (1); pattern = skipws(noctrl(pattern)); if (pattern == "") exit (1); while (getline filename > 0) { process(filename, pattern); printf (". %s\n", filename); fflush(); } exit (0); } else if (listattributes != "") { attrnum["filename"] = ++attrcount; attrlist[1] = "filename"; attrnum["title"] = ++attrcount; attrlist[2] = "title"; sub(/\/+$/, "", basedirectory); bdl = 0; if (basedirectory != "") { basedirectory = basedirectory "/"; bdl = length(basedirectory); } attrrownum = 0; while (getline > 0) { if ($1 == ".") break; if (NF == 1) { p = $1; if (substr(p, 1, bdl) == basedirectory) p = substr(p, bdl+1); attrtab[++attrrownum, attrnum["filename"]] = p; } else { attrtab[++attrrownum, attrnum["filename"]] = $2; } #printf (">> %s\n", $1) >>STDERR; process($1, ""); } for (i=1; i<=attrcount; i++) attrtab[0, i] = attrlist[i]; q = attrnum["title"]; s = attrnum[sortby]; if (s == 0) s = q; k = 0; for (i=0; i<=attrrownum; i++) { line = attrtab[i, 1]; for (j=2; j<=attrcount; j++) { v = attrtab[i, j]; if (j == q && v == "") { v = attrtab[i, 1]; sub(/^.*\//, "", v); } line = line "\t" v; } if (i == 0) row[++k] = "\001" line; else { idx = sprintf ("%s:%06d", tolower(attrtab[i, s]), i); row[++k] = idx "\001" line; } # printf (">> %s\n", line); # fflush(); } n = asort(row); for (i=1; i<=n; i++) { line = row[i]; sub(/^.*\001/, "", line); printf ("%s\n", line); } } else if (textstart != 0) { while (getline filename > 0) process(filename, ""); exit (0); } #ifdef WITH_PRINTLINKTABLE else if (printlinktable != 0) { while (getline filename > 0) { if (substr(filename, 1, 2) == "./") filename = substr(filename, 3); if (substr(filename, 1, length(wikibasedir) + 1) == wikibasedir "/") { p = filename; filename = substr(filename, length(wikibasedir) + 2); } else p = wikibasedir "/" filename; #printf (">> %s\n", p) >>STDERR; process(p, ""); basehref = "/" filename; sub(/[^/]*$/, "", basehref); delete havelink; for (i=1; i <= linkcount; i++) { split(linklist[i], x, "\t"); if (printlinktype != "any" && x[1] != printlinktype) continue; if ((link = rewritelink(basehref, x[2])) != "") { if (link in havelink) continue; if (substr(link, 1, 1) == "/") link = substr(link, 2); printf ("%s\t%s\t%s\n", x[1], filename, link); } } } } #endif else { filename = "/dev/stdin"; process(filename, pattern); } exit (0); }