🗃️ Update codebase 🗃️

rootfs/root/docker/setup/06-post.sh
2025-12-15 20:07:24 -05:00 · 2025-10-22 16:15:54 -04:00
parent e390e42502
commit 21fc813a08
1 changed files with 85 additions and 134 deletions
--- a/rootfs/root/docker/setup/06-post.sh
+++ b/rootfs/root/docker/setup/06-post.sh
@@ -26,141 +26,26 @@ set -o pipefail
 exitCode=0
 LIST_INDEX_FILE="/usr/share/httpd/default/list.html"
 RAW_URL="https://raw.githubusercontent.com/alecmuffett/real-world-onion-sites/refs/heads/master/README.md"
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 # Predefined actions
-md2html() {
+__build_onions_html() {
-	# Emit a lightweight container; you can remove the outer <div> if you don’t want it.
+	# temp file (POSIX-safe mktemp use)
-	emit_open() {
+	tmp_md="$(mktemp 2>/dev/null || printf '/tmp/rwos.%s' "$$")" || {
-		cat <<'EOF'
+		printf '%s\n' "mktemp failed" >&2
-<div class="wrap md-body">
+		return 1
 EOF
 	}
 	emit_close() {
 		cat <<'EOF'
 </div>
 EOF
 	}
 	trap 'rm -f "$tmp_md"' 0 1 2 3 15
-	# AWK Markdown -> HTML (body only). Designed to handle the README’s structure.
+	# 1) Fetch README (UTF-8)
-	awk_prog='
+	if ! curl -fsSL "$RAW_URL" >"$tmp_md"; then
-BEGIN{
+		printf '%s\n' "curl failed to fetch README" >&2
-  in_code=0; in_ul=0; in_ol=0; in_blockquote=0; in_table=0; table_open=0;
+		return 2
-  last_nonblank="";
+	fi
  FS="";
 }
 function htmlesc(s){ gsub(/&/,"&amp;",s); gsub(/</,"&lt;",s); gsub(/>/,"&gt;",s); return s }
-function linkify(s){
+	# 2) Emit full HTML
-  # Images: ![alt](src)
+	{
-  s = gensub(/!\[([^[\]]+)\]\(([^) \t]+)\)/, "<img alt=\"\\1\" src=\"\\2\" />", "g", s)
+		# --- HEAD (your exact head) ---
-  # Links: [text](url)
+		cat <<'HTML_HEAD'
  s = gensub(/\[([^[\]]+)\]\(([^) \t]+)\)/, "<a href=\"\\2\" rel=\"noopener noreferrer\">\\1<\\/a>", "g", s)
  # Inline code
  s = gensub(/`([^`]+)`/, "<code>\\1<\\/code>", "g", s)
  # Bold / italic (simple)
  s = gensub(/\*\*([^*]+)\*\*/, "<strong>\\1<\\/strong>", "g", s)
  s = gensub(/\*([^*]+)\*/, "<em>\\1<\\/em>", "g", s)
  # Autolink http(s)
  s = gensub(/(https?:\/\/[A-Za-z0-9._~:\/?#\\[\\]@!$&'\''()*+,;=%-]+)/, "<a href=\"\\1\" rel=\"noopener noreferrer\">\\1<\\/a>", "g", s)
  # Bare .onion (with optional path/query)
  s = gensub(/([A-Za-z2-7]{16,56}\\.onion([A-Za-z0-9._~:\/?#\\[\\]@!$&'\''()*+,;=%-]*)?)/, "<a href=\"http:\\/\\/\\1\" rel=\"noopener noreferrer\">\\1<\\/a>", "g", s)
  return s
 }
 function flush_p(){ if(pbuf!=""){ printf("<p>%s</p>\n", pbuf); pbuf="" } }
 function close_lists(){ if(in_ul){ print "</ul>"; in_ul=0 } if(in_ol){ print "</ol>"; in_ol=0 } }
 function close_blockquote(){ if(in_blockquote){ print "</blockquote>"; in_blockquote=0 } }
 function table_close_fn(){ if(table_open){ print "</tbody></table>"; table_open=0 } in_table=0 }
 function split_cells(line,  n,i){
  sub(/^ *\|/,"",line); sub(/\| *$/,"",line);
  n=split(line, C, /\|/);
  for(i=1;i<=n;i++){ gsub(/^ +| +$/,"",C[i]) }
  return n
 }
 {
  raw=$0
  if(raw ~ /[^[:space:]]/){ last_nonblank=raw }
  # Fenced code
  if(!in_code && raw ~ /^```/){ flush_p(); close_lists(); close_blockquote(); print "<pre><code>"; in_code=1; next }
  if(in_code){
    if(raw ~ /^```/){ print "</code></pre>"; in_code=0; next }
    print htmlesc(raw); next
  }
  # Blank line
  if(raw ~ /^[[:space:]]*$/){
    if(in_table==0){ flush_p(); close_lists(); close_blockquote() }
    next
  }
  # HR
  if(raw ~ /^ *(-{3,}|\*{3,}|_{3,}) *$/){ flush_p(); close_lists(); close_blockquote(); print "<hr />"; next }
  # Table separator row -> open table with the previous header line
  if(raw ~ /^ *\|? *:?-{3,}:? *(?:\| *:?-{3,}:? *)+\|? *$/){
    hdr=last_nonblank
    split_cells(hdr); print "<table><thead><tr>";
    for(i=1;i in C;i++){ printf("<th>%s</th>", linkify(htmlesc(C[i]))) }
    print "</tr></thead><tbody>"; in_table=1; table_open=1; next
  }
  if(in_table && raw ~ /\|/){
    split_cells(raw); printf("<tr>");
    for(i=1;i in C;i++){ printf("<td>%s</td>", linkify(htmlesc(C[i]))) }
    print "</tr>"; next
  }
  if(in_table){ table_close_fn() }
  # Blockquotes
  if(raw ~ /^ *> */){
    flush_p(); close_lists();
    if(!in_blockquote){ print "<blockquote>"; in_blockquote=1 }
    gsub(/^ *> */,"",raw); print "<p>" linkify(htmlesc(raw)) "</p>"; next
  } else if(in_blockquote){ close_blockquote() }
  # Headings
  if(raw ~ /^#{1,6} /){
    flush_p(); close_lists(); close_blockquote();
    m=match(raw,/^#{1,6}/); level=RLENGTH; text=substr(raw, level+2);
    printf("<h%d>%s</h%d>\n", level, linkify(htmlesc(text)), level); next
  }
  # Lists
  if(raw ~ /^ *([-*]) +/){
    flush_p(); if(!in_ul){ print "<ul>"; in_ul=1 }
    item=raw; sub(/^ *[-*] +/,"",item); print "<li>" linkify(htmlesc(item)) "</li>"; next
  }
  if(raw ~ /^ *[0-9]+\. +/){
    flush_p(); if(!in_ol){ print "<ol>"; in_ol=1 }
    item=raw; sub(/^ *[0-9]+\. +/,"",item); print "<li>" linkify(htmlesc(item)) "</li>"; next
  }
  if(in_ul && raw !~ /^ *([-*]) +/ && raw !~ /^[[:space:]]*$/){ close_lists() }
  if(in_ol && raw !~ /^ *[0-9]+\. +/ && raw !~ /^[[:space:]]*$/){ close_lists() }
  # Paragraph accumulation (soft-wrap)
  line = linkify(htmlesc(raw))
  if(pbuf==""){ pbuf=line } else { pbuf=pbuf " " line }
 }
 END{
  if(in_code){ print "</code></pre>" }
  if(in_table){ table_close_fn() }
  if(in_blockquote){ close_blockquote() }
  flush_p(); close_lists()
 }
 '
 	emit_open
 	curl -q -LSsf "$RAW_URL" | awk "$awk_prog" || return 1
 	emit_close
 }
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 # Main script
 echo "Creating the onion web sites list"
 cat >"$LIST_INDEX_FILE" <<'HTML'
 <!doctype html>
 <html lang="en">
 <head>
@@ -171,10 +56,76 @@ cat >"$LIST_INDEX_FILE" <<'HTML'
  <title>Welcome to your tor site</title>
 </head>
 <body>
-HTML
+<main>
-md2html >>"$LIST_INDEX_FILE"
+<pre>
-printf '<div><center>%s</center></div>\n' "Last uopdated on $(date +'%A, %B %d, %Y at %H:%M %Z')" >>"$LIST_INDEX_FILE"
+HTML_HEAD
-printf '\n</body>\n</html>\n' >>"$LIST_INDEX_FILE"
+
 		# --- Body content pipeline ---
 		# Step A: replace GitHub :shortcodes: with emoji
 		# Step B: HTML-escape (&, <, >) to preserve literal layout
 		# Step C: auto-link http(s) URLs (done after escaping so <a> survives)
 		awk '
      BEGIN { OFS=""; RS="\n"; ORS="\n" }
      {
        gsub(":white_check_mark:", "✅")
        gsub(":closed_lock_with_key:", "🔐")
        gsub(":small_red_triangle:", "🔺")
        gsub(":key:", "🔑")
        gsub(":question:", "❓")
        gsub(":sos:", "🆘")
        gsub(":alarm_clock:", "⏰")
        gsub(":timer_clock:", "⏲️")
        gsub(":exclamation:", "❗")
        gsub(":eight_spoked_asterisk:", "✳️")
        gsub(":crystal_ball:", "🔮")
        gsub(":lock:", "🔒")
        gsub(":arrow_up:", "⬆️")
        gsub(":wrench:", "🔧")
        gsub(":new:", "🆕")
        print
      }
    ' "$tmp_md" |
 			awk '
        # HTML-escape in correct order
        { gsub(/&/, "&amp;"); gsub(/</, "&lt;"); gsub(/>/, "&gt;"); print }
      ' |
 			awk '
        # Auto-link URLs using POSIX awk (ERE), no gensub needed.
        # Allowed URL chars kept conservative; onion links are fine.
        {
          line = $0
          re = "https?://[A-Za-z0-9._~:/?#@!$&()*+,;=%-]+"
          out = ""
          while (match(line, re)) {
            pre = substr(line, 1, RSTART-1)
            url = substr(line, RSTART, RLENGTH)
            line = substr(line, RSTART+RLENGTH)
            out = out pre "<a href=\"" url "\" rel=\"noopener noreferrer\">" url "</a>"
          }
          $0 = out line
          print
        }
      '
 		# --- TAIL ---
 		cat <<'HTML_TAIL'
 </pre>
 <div><center>%s</center></div>\n' "Last uopdated on $(date +'%A, %B %d, %Y at %H:%M %Z')"
 </main>
 </body>
 </html>
 HTML_TAIL
 	} >"$LIST_INDEX_FILE" || {
 		printf '%s\n' "write failed: $LIST_INDEX_FILE" >&2
 		return 3
 	}
 	printf 'Wrote %s\n' "$LIST_INDEX_FILE" >&2
 }
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 # Main script
 __build_onions_html
 # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 # Set the exit code
 #exitCode=$?