#!/bin/sh # # export wiki pages to the cryptobox development tree # this creates static and integrated pages # set -ue # root directory of the cryptobox development environment ROOT_DIR="$(dirname $(dirname $0))" # retrieve these pages from the wiki PAGES="CryptoBox CryptoBoxIntro CryptoBoxDev CryptoBoxDevPreparation CryptoBoxDevCustomBuild CryptoBoxDevWorkFlow CryptoBoxDevValidation CryptoBoxDevCustomConfigure CryptoBoxDevBackground CryptoBoxDevKnownProblems CryptoBoxUser CryptoBoxUserGettingStarted CryptoBoxUserConfiguration CryptoBoxUserDailyUse" # base URL WIKI_HOST="https://systemausfall.org" # the trailing slash is important WIKI_URL=/toolforge/ CBOX_CGI="/cryptobox?action=doc\&page=" LANGUAGES="en de" DEST_DIR="$ROOT_DIR/cbox-tree.d/usr/share/doc/cryptobox/html" OFFLINE_DIR="$ROOT_DIR/cbox-tree.d/_offline/doc" IMAGE_DIR="$ROOT_DIR/cbox-tree.d/var/www/cryptobox-img" TMP_DIR=/tmp/$(basename $0)-$$.d HEADER_FILE=doc_header.inc FOOTER_FILE=doc_footer.inc [ ! -e "$DEST_DIR" ] && echo "$DEST_DIR does not exist" && exit 1 for LANG in $LANGUAGES; do for PAGE in $PAGES; do PAGE_SRC="$WIKI_HOST$WIKI_URL$PAGE/$LANG" echo "Importing $PAGE/$LANG:" # replace sub-page-style '/' like moin does it (by '_2f') TMP_FILE=$TMP_DIR/${PAGE}.html mkdir -p "$TMP_DIR" echo " downloading the page ..." wget --quiet --output-document="$TMP_FILE" "$PAGE_SRC" || { echo "Downloading ($PAGE_SRC) failed!"; exit 1; } # check if this moin page exists (by looking for the template selection) if grep -q "$WIKI_URL$PAGE/$LANG.action=edit&template=" "$TMP_FILE" then rm "$TMP_FILE" PAGE_SRC=$(dirname $PAGE_SRC) echo " trying to download default language page instead" wget --quiet --output-document="$TMP_FILE" "$PAGE_SRC" || { echo "Downloading ($PAGE_SRC) failed!" >&2; exit 1; } # check, if there is even no default page grep -q "$WIKI_URL$PAGE/$LANG.action=edit&template=" "$TMP_FILE" && echo "This page ($PAGE_SRC) was not found!" >&2 && exit 1 fi echo " removing header and footer ..." # break lines before start of content sed -i 's#
]* id="content" [^>]*>#_END_OF_HEADER_\n#' "$TMP_FILE" sed -i 's##\n_START_OF_FOOTER_#' "$TMP_FILE" # remove all lines before and after "body" sed -i '1,/_END_OF_HEADER_/d; /_START_OF_FOOTER_/,$d' "$TMP_FILE" echo " removing link images (moin specific) ..." # remove inter-wiki images sed -i 's#<[^<]*moin-inter.png[^>]*>##g' "$TMP_FILE" # remove moin-www images sed -i 's#<[^<]*moin-www.png[^>]*> ##g' "$TMP_FILE" echo " downloading requisites ..." wget --quiet --ignore-tags=a --no-clobber --page-requisites --convert-links --no-parent --no-directories --base="$WIKI_HOST$WIKI_URL" --directory-prefix="$TMP_DIR" --html-extension --force-html --input-file="$TMP_FILE" || { echo "Downloading requisites for ($PAGE_SRC) failed!"; exit 1; } echo " adjusting links for requisites ..." find "$TMP_DIR" -type f | grep -v '\.html$' | while read a do fname=$(basename "$a") sed -i "s#=\"[^\"]*/$fname\"#=\"/cryptobox-img/$fname\"#g" "$TMP_FILE" done echo " adjusting wiki links ..." # redirect wiki links to cryptobox cgi sed -i "s#=\"$WIKI_URL\([^\.]*\)\"#=\"$CBOX_CGI\1\"#g" "$TMP_FILE" # do it twice - somehow, the "g" flag does not work (it should replace multiple occurrences on a line) sed -i "s#=\"$WIKI_URL\([^\.]*\)\"#=\"$CBOX_CGI\1\"#g" "$TMP_FILE" # remove language specific part of moin link for TLANG in $LANGUAGES do sed -i "s#=\"$CBOX_CGI\(.*\)/$TLANG\"#=\"$CBOX_CGI\1\"#g" "$TMP_FILE" done # build the static pages echo " building static doc page" offline_file=$OFFLINE_DIR/$LANG/$(basename $TMP_FILE) mkdir -p "$OFFLINE_DIR/$LANG" cat "$OFFLINE_DIR/$HEADER_FILE" "$OFFLINE_DIR/$LANG/$HEADER_FILE" "$TMP_FILE" "$OFFLINE_DIR/$LANG/$FOOTER_FILE" "$OFFLINE_DIR/$FOOTER_FILE" >"$offline_file" sed -i "s#=\"$CBOX_CGI\([^\"]*\)\"#=\"\1.html\"#g" "$offline_file" # do it twice - this should not be necessary sed -i "s#=\"$CBOX_CGI\([^\"]*\)\"#=\"\1.html\"#g" "$offline_file" sed -i "s#=\"/cryptobox-img#=\"../../../var/www/cryptobox-img#g" "$offline_file" # split language specific part of moin link and replace it by current language for TLANG in $LANGUAGES do sed -i "s#=\"\([^/]*\)/${TLANG}.html\"#=\"\1.html\"#g" "$offline_file" done # some last changes to the dynamic pages (must be done _after_ the static pages) # add weblang for current language to query string sed -i "s#=\"$CBOX_CGI\(.*\)\"#=\"$CBOX_CGI\1\&weblang=$LANG\"#g" "$TMP_FILE" # move cgi-doc mv "$TMP_FILE" "$DEST_DIR/$LANG" echo " finished!" done done mv "$TMP_DIR"/* "$IMAGE_DIR" rmdir "$TMP_DIR"