cryptocd/documents/mod_html_files.py

#!/usr/bin/env pythonx
#-*- coding: UTF-8 -*-
'''gets a latex2html file and returns the file with a css tag
and stuff'''
import string
import sys
import re
import os.path

def writeFile(data, filename):
	"""
	write data to the given filename
		@param filename String : name of file to write to
	"""
	try:
		f = open(filename,"w")#oeffnen und schliessen =>
		f.close()			#datei ist jetzt genullt
		f = open(filename,"a")	#anhaengend oeffnen
		f.write(data)
		f.close()
		return ""
	except:
		print "(WW)[%s]: \"%s\" is not writeable!"%(__name__, filename)
		return filename


### start of code
try:
	f=open(sys.argv[1],"r")
	content=f.read()
	f.close()

	# zu_manipulierende_variable=string.replace(zu_manipulierende_variable,alter_string,neuer_string)

	# recently switched to latex2html, now cutting of headers
	# TODO: dieser abschnitt muss ueberarbeitet werden!
	startoffset=string.find(content,"<HR>\n<ADDRESS>")
	endoffset=string.find(content,"</ADDRESS>",startoffset)+len("</ADDRESS>")
	if (startoffset >10) and (endoffset>startoffset):
		footer=content[startoffset:endoffset]
		content=string.replace(content,footer,"")

	#put whole content into div tags
	content = string.replace(content,"<BR><HR>\n\n</BODY>",'</div>')
	content = string.replace(content,"</HTML>",'<!-- end of main -->')

	#remove empty image subtitles
	content = string.replace(content,"<STRONG>Abbildung:</STRONG>","")

	#Bereich "author_info": Entfernen der Formatierung
        startoffset=string.find(content,'<DIV CLASS="author_info"')
        endoffset=string.find(content,"</STRONG>",startoffset)+len("</STRONG>")
        if (startoffset >10) and (endoffset>startoffset):
			content=string.replace(content,'<P ALIGN="CENTER">',"<p>")
			content=string.replace(content,"<STRONG>","")
			content=string.replace(content,"</STRONG>","")

	#TODO: TOC formatieren - Der Abschnitt <A NAME="SECTION00001000000000000000"> muss noch allgemeiner werden
	content = string.replace(content,'<BR>\n\n<H2><A NAME="SECTION00001000000000000000">\nInhalt</A>\n</H2>\n<!--Table of Contents-->','\n<div id="toc">\n<div id="toctitle">Auf dieser Seite:</div>')
	content = string.replace(content,"<!--End of Table of Contents-->","</div>")

	# replace "./filename.png" references with "filename.png"
	# (this avoids validation warnings)
	content = string.replace(content,'HREF="./','HREF="')

	# die Stylesheet-Einbindung von latex2html ist nicht standardkonform
	# ein Bug-Report ging an die l2html-Mailingliste (29.12.02006)
	content = string.replace(content, '<LINK REL="STYLESHEET" HREF=', '<link rel="stylesheet" type="text/css" href=')

	# leere Absaetze in Tabellenelementen entfernen
	content = string.replace(content, '\n<P></TD>', '\n</td>')

	# Fussnoten finden und Ende ersetzen
	fussnoten = string.find(content,"<BR><HR><H4>Fu&szlig;noten</H4>")
	startoffset = string.find(content,"</DL>",fussnoten)
	endoffset = startoffset + len("</DL>")
	if startoffset > 10 and endoffset > 10:
		content = content[:startoffset] + "</DL>\n</div>" + content[endoffset:]

	# Linie vor Fussnoten wird entfernt
	# Fussnoten bekommen id, damit sie per css formatierbar sind
	content = string.replace(content,'<HR><H4>Fu&szlig;noten</H4>','\n<div id="footnotes">\n<div id="fntitle">Fu&szlig;noten</div>')

	# Sonderzeichen in Fussnoten werden ersetzt (Hotfix fuer einen latex2html-Bug)
	content = string.replace(content,'<EFBFBD>','&auml;')
	content = string.replace(content,'<EFBFBD>','&Auml;')
	content = string.replace(content,'<EFBFBD>','&ouml;')
	content = string.replace(content,'<EFBFBD>','&Ouml;')
	content = string.replace(content,'<EFBFBD>','&uuml;')
	content = string.replace(content,'<EFBFBD>','&Uuml;')
	content = string.replace(content,'<EFBFBD>','&szlig;')

	# externe Links innerhalb von <div id="main"> erhalten eine class
	content = string.replace(content,'HREF="http:','class="externalLink" href="http:')
	content = string.replace(content,'HREF="https:','class="externalLink" href="https:')

	# Geht zur Position des Progress-Menues,
	# sucht nach einem Link = NameDerEingabedateiDiesesSkriptes,
	# ersetzt ersten <li>-tag der folgt
	startoffset=string.find(content,'<div id="progress">')
	namensposition = string.find(content,os.path.basename(sys.argv[1]),startoffset)
	if namensposition > 0:
		content = content[:namensposition] + string.replace(content[namensposition:],'<li>','<li class="next">',1)

	#all done, writing tuned files
	writeFile(content,sys.argv[1])

except: #read/write/whatever failed,
	# TODO: unklare Fehlerbehandlung
	print "an defined error occured - but i'm too lame to fix that"
	print "usage:",sys.argv[0],"<file_to_modify>"
	sys.exit(1)
sys.exit(0)