grapho/md2html

283 lines
5.5 KiB
Plaintext
Raw Normal View History

#!/bin/sh
tmpfile=".$$.$1.md2html"
awk "$@" '
2021-11-28 18:26:47 +01:00
BEGIN {
indent = 0
prefix = "html/body/"
print("<!DOCTYPE html>")
level("html/head")
2022-07-02 22:55:45 +02:00
meta["lang"]="en"
toc=0
2021-11-28 18:26:47 +01:00
}
function headers() {
printf("%0" indent "s<meta charset=UTF-8>\n", "")
if (meta["title"]) printf("%0" indent "s<title>%s</title>\n", "", escape(meta["title"]))
if (meta["author"]) printf("%0" indent "s<meta name=author content=\"%s\">\n", "", escape(meta["author"]))
if (stylesheet) printf("%0" indent "s<link href=\"%s\" rel=stylesheet type=text/css media=all>\n", "", stylesheet)
if (javascript) printf("%0" indent "s<script src=\"/%s\"></script>\n", "", escape(javascript))
}
function oneliner(tag, str, args) {
printf("%0" indent "s<%s>%s</%s>\n", "", tag args, inline(str), tag)
}
function level(new) {
split(new, a, "/")
split(old, b, "/")
for (common = 0; common < 10; common++) {
if (common >= length(a)) break
if (common >= length(b)) break
if (a[common] != b[common]) break
}
for (i = length(b); i >= common; --i) {
if (b[i]) {
if (b[i] == "head") headers()
indent = indent - 2
2022-04-08 21:48:16 +02:00
if (b[i]=="pre") {
printf("</%s>\n", b[i])
} else {
printf("%0" indent "s</%s>\n", "", b[i])
}
2021-11-28 18:26:47 +01:00
}
}
for (i = common; i < length(a); i++) {
if (a[i]) {
2022-07-02 22:55:45 +02:00
if (a[i]=="body") {
printf("%0" indent "s<%s lang=%s>\n", "", a[i], meta["lang"])
} else {
printf("%0" indent "s<%s>\n", "", a[i])
}
2021-11-28 18:26:47 +01:00
indent = indent + 2
}
}
old = new
}
function escape(text) {
gsub(/&/, "\\&amp;", text)
gsub(/</, "\\&lt;", text)
gsub(/>/, "\\&gt;", text)
gsub(/"/, "\\&quot;", text)
2021-11-28 18:26:47 +01:00
return text
}
function inline(text) {
2021-11-28 18:26:47 +01:00
text = escape(text)
text = gensub(/\*\*([^\*]*)\*\*/, "<strong>\\1</strong>", "g", text)
text = gensub(/\*([^\*]*)\*/, "<em>\\1</em>", "g", text)
text = gensub(/`([^`]*)`/, "<code>\\1</code>", "g", text)
2022-04-07 18:05:26 +02:00
text = gensub(/\[(.*)\]\((.*)\)/, "<a href=\"\\2\">\\1</a>", "g", text)
2021-11-28 18:26:47 +01:00
return text
}
/^---$/ {
parse_meta = !parse_meta
next
}
{
if(parse_meta) {
split($0, r, ": ")
gsub(/(^"|"$)/, "", r[2])
meta[r[1]]=r[2]
next
}
}
/^```/ {
if (old == prefix "pre") {
level(prefix)
} else {
level(prefix "pre")
}
next
}
{
if (old == prefix "pre") {
printf("%s\n", escape($0))
next
}
}
/^- / {
level(prefix "ul")
level(prefix "ul/li")
gsub(/^- /, "", $0)
printf("%0" indent "s%s\n", "", inline($0))
next
}
/^\s*\* / {
level(prefix "ul")
level(prefix "ul/li")
gsub(/^\s*\* /, "", $0)
printf("%0" indent "s%s\n", "", inline($0))
next
}
/^> / {
level(prefix "quote")
gsub(/^> /, "", $0)
printf("%0" indent "s%s\n", "", inline($0))
}
/^ / {
level(prefix "blockquote")
printf("%0" indent "s%s\n", "", inline($0))
}
/^ / {
if (old == prefix) level(prefix "p")
printf("%0" indent "s%s\n", "", inline($0))
next
}
2022-06-26 02:24:23 +02:00
/^##* / {
d=length($1)
2021-11-28 18:26:47 +01:00
gsub(/^#* /, "", $0)
2022-06-26 02:24:23 +02:00
if (d==1 && !meta["title"]) meta["title"] = $0
if (d!=1 && toc==0) {
level(prefix "summary")
level(prefix)
toc=1
}
2022-06-26 02:24:23 +02:00
id1=$0
gsub(/[^a-zA-Z0-9]/, "", id1)
hash[d]=id1
id = ""
for (i = 2; i <= d; i++) {
id = id hash[i]
}
if (id != "") id = " id=\"" escape(id) "\""
2021-11-28 18:26:47 +01:00
level(prefix)
2022-06-26 02:24:23 +02:00
oneliner("h"d, $0, id)
2021-11-28 18:26:47 +01:00
next
}
/^$/ {
if (old != "html/head") level(prefix)
next
}
/^!\[.*\]\(.*\)$/ {
txt = $0
filenam = $0
gsub(/(^!\[|\].*$)/, "", txt)
gsub(/(^!.*\(|\)$)/, "", filenam)
2022-06-25 03:03:01 +02:00
level(prefix "figure")
oneliner("img",""," src=\"" escape(filenam) "\" alt=\"" escape(txt) "\"")
2022-06-25 03:03:01 +02:00
oneliner("figcaption", txt, "")
next
}
2021-11-28 18:26:47 +01:00
{
level(prefix "p")
printf("%0" indent "s%s\n", "", inline($0))
next
}
END {
level("")
}' > "$tmpfile"
sed -n '/<summary>/!p;//q' "$tmpfile"
awk '
BEGIN {
indent = 4
level("summary")
2022-07-02 22:55:45 +02:00
lang="en"
tocname["en"]="Table of contents"
tocname["de"]="Inhaltsverzeichnis"
}
function oneliner(tag, str, args) {
printf("%0" indent "s<%s>%s</%s>\n", "", tag args, inline(str), tag)
}
function level(new) {
split(new, a, "/")
split(old, b, "/")
for (common = 0; common < 10; common++) {
if (common >= length(a)) break
if (common >= length(b)) break
if (a[common] != b[common]) break
}
for (i = length(b); i >= common; --i) {
if (b[i]) {
indent = indent - 2
if (b[i]=="pre") {
printf("</%s>\n", b[i])
} else {
printf("%0" indent "s</%s>\n", "", b[i])
}
}
}
for (i = common; i < length(a); i++) {
if (a[i]) {
printf("%0" indent "s<%s>\n", "", a[i])
indent = indent + 2
}
}
old = new
}
function escape(text) {
gsub(/&/, "\\&amp;", text)
gsub(/</, "\\&lt;", text)
gsub(/>/, "\\&gt;", text)
gsub(/"/, "\\&quot;", text)
return text
}
function inline(text) {
text = escape(text)
text = gensub(/\*\*([^\*]*)\*\*/, "<strong>\\1</strong>", "g", text)
text = gensub(/\*([^\*]*)\*/, "<em>\\1</em>", "g", text)
text = gensub(/`([^`]*)`/, "<code>\\1</code>", "g", text)
text = gensub(/\[(.*)\]\((.*)\)/, "<a href=\"\\2\">\\1</a>", "g", text)
return text
}
2022-07-02 22:55:45 +02:00
/<body lang=/ {
lang=$0
gsub(/^.*lang="*/,"",lang)
2022-07-02 22:55:45 +02:00
gsub(/[^a-z].*$/,"",lang)
level("summary")
oneliner("h2", tocname[lang])
}
/<h[2-6] / {
id=$0
gsub(/^.*id="/,"",id)
gsub(/">.*$/,"",id)
d=$1
gsub(/^<h/, "", d)
gsub(/ *<[^>]*>/, "", $0)
trg="summary/ul"
for(i=2;i<d;i++) trg=trg "/li/ul"
level(trg)
level(trg "/li")
oneliner("a", $0, " href=\"#" id "\"")
}
2021-11-28 18:26:47 +01:00
END {
level("")
}
' < "$tmpfile"
sed -e '1,/<\/summary>/ d' "$tmpfile"
rm "$tmpfile"