cryptonas-archive/deb-live_v0.3.5/scripts/mirror_offline_doc.sh

141 lines
3.8 KiB
Bash
Executable file

#!/bin/sh
PROJ_DIR=$(dirname "$0")/..
PROJ_DIR=$(cd "$PROJ_DIR"; pwd)
DEST_DIR="$PROJ_DIR/live-cd-tree.d/_offline/doc"
WIKI_PAGES="doc/0.3/CryptoBoxUserGettingStarted/en
doc/0.3/CryptoBoxUserDailyUse/en
CryptoBoxUser/en
faq"
function prepare_wiki_page()
{
sed -i '1,/<div class="wikipage">/d' "$1"
# remove the "comment" or "edit" stuff
sed -i '/<h2>Comments<\/h2>/,$d' "$1"
sed -i '/<div class="buttons">/,$d' "$1"
# remove the last horizontal line
sed -i '$,$d' "$1"
# add anchor ids to every head line
sed -i 's#<h\([1-4]\)>\(.*\)</h#<h\1 id="\2">\2</h#g' "$1"
while grep -q '<h[1-4] id="\([a-zA-Z]*\)[^a-zA-Z"]' "$1"
do sed -i 's#<h\([1-4]\) id="\([a-zA-Z]*\)[^a-zA-Z"]#<h\1 id="\2#g' "$1"
done
# convert wiki links
sed -i 's#="/wiki/\([^"/]*\)/#="/wiki/\1_#g' "$1"
sed -i 's#="/wiki/\([^"/]*\)/#="/wiki/\1_#g' "$1"
sed -i 's#="/wiki/\([^"/]*\)/#="/wiki/\1_#g' "$1"
sed -i 's#="/wiki/\([^"/]*\)/#="/wiki/\1_#g' "$1"
sed -i 's#="/wiki/\([^"#]*\)\([#"]\)#="\1.html\2#g' "$1"
# remove outdated documentation
sed -i 's#</ol>#</ol>\n#g' "$1"
sed -i '/outdated/,/<\/ol>/d' "$1"
# remove "searchable" ids (blue coloring of head lines)
sed -i 's#<div id="searchable">#<div>#g' "$1"
# fix image sources
sed -i 's#src="/file/[^"]*/\([^/\?]*\)["\?]#src="\1"#g' "$1"
}
function wrap_wiki_page()
{
# add header and footer
(
echo "$page_header"
echo '<div class="centercontent">'
cat "$1"
echo '</div>'
echo "$page_footer"
) >"${1}.new"
mv "${1}.new" "$1"
}
function rename_files()
{
ls | grep "\?format=raw$" | while read fname
do local real_name=$(echo "$fname" | sed 's/\?.*$//')
mv "$fname" "$real_name"
done
ls | grep "\.[0-9]*$" | while read fname
do rm "$fname"
done
}
function redirect_homepage_links()
{
ls *.html | while read fname
do while grep -q '="http://devel.cryptobox.org/wiki/[^/"]*/' "$fname"
do sed -i 's#\(="http://devel.cryptobox.org/wiki/[^/"]*\)/#\1_#g' "$fname"
done
sed -i 's#="http://devel.cryptobox.org/wiki/\([^"\#]*\)\(["\#]\)#="\1.html\2"#g' "$fname"
done
}
function remove_useless_files()
{
rm -f robots.txt
}
function rename_one_file()
{
find . -type f | grep -v "/\.svn" | while read fname
do sed -i "s#\([^a-zA-Z0-9]\)$1#\1$2#g" "$fname"
done
mv "$1" "$2"
}
function rename_long_files()
{
# this is necessary to avoid problems with the 31-character restriction of iso9660
# on windows systems
find . -type f | grep -v "/\.svn" | while read fname
do if test 14 -lt "${#fname}"
then local neu_prefix=$(echo "${fname:2:6}" | sed 's#/#_#g')
local neu_num=0
local neu_suffix=$(echo "$fname"| sed 's#^.*\.\([^\.]*\)$#.\1#g')
test 5 -lt "${#neu_suffix}" && neu_suffix=.${neu_suffix:-4}
while test -e "$neu_prefix$neu_num$neu_suffix"
do local i=$((neu_num+1))
# we should use the additional step (using 'i') to avoid a bash-specific
# handling of self-increment
neu_num=$i
done
rename_one_file "${fname:2}" "$neu_prefix$neu_num$neu_suffix"
fi
done
}
test ! -d "$DEST_DIR" && echo "Destination directory does not exist: $DEST_DIR" && exit 1
# cleanup destination directory
test -e "$DEST_DIR/index.html" && find "$DEST_DIR/" -type f | grep -v "/\.svn" | xargs rm
cd "$DEST_DIR"
# retrieve pages from cryptobox.org
wget --mirror --page-requisites --no-directories --convert-links --quiet http://cryptobox.org
wget --output-document=header.jpg --quiet http://cryptobox.org/gfx/header.jpg
page_header=$(sed -n '1,/<!-- content starts here -->/p' index.html)
page_footer=$(sed -n '/<!-- content ends here -->/,$p' index.html)
for page in $WIKI_PAGES
do filename=${page//\//_}.html
wget --no-directories --page-requisites --quiet "http://devel.cryptobox.org/wiki/$page"
mv "$(basename $page)" "$filename"
prepare_wiki_page "$filename" "$page"
wrap_wiki_page "$filename"
done
rename_files
redirect_homepage_links
remove_useless_files
rename_long_files