2006-08-12 18:28:38 +02:00
#!/usr/bin/env pythonx
2006-08-08 14:33:06 +02:00
#-*- coding: UTF-8 -*-
2006-08-12 18:28:38 +02:00
''' gets a latex2html file and returns the file with a css tag
and stuff '''
import string
import sys
import re
2006-09-19 12:38:25 +02:00
import os . path
2006-08-12 18:28:38 +02:00
def writeFile ( data , filename ) :
"""
write data to the given filename
@param filename String : name of file to write to
"""
try :
f = open ( filename , " w " ) #oeffnen und schliessen =>
f . close ( ) #datei ist jetzt genullt
f = open ( filename , " a " ) #anhaengend oeffnen
f . write ( data )
f . close ( )
return " "
except :
print " (WW)[ %s ]: \" %s \" is not writeable! " % ( __name__ , filename )
return filename
### start of code
try :
f = open ( sys . argv [ 1 ] , " r " )
content = f . read ( )
f . close ( )
# zu_manipulierende_variable=string.replace(zu_manipulierende_variable,alter_string,neuer_string)
# recently switched to latex2html, now cutting of headers
2006-12-29 18:31:59 +01:00
# TODO: dieser abschnitt muss ueberarbeitet werden!
2006-08-12 18:28:38 +02:00
startoffset = string . find ( content , " <HR> \n <ADDRESS> " )
endoffset = string . find ( content , " </ADDRESS> " , startoffset ) + len ( " </ADDRESS> " )
if ( startoffset > 10 ) and ( endoffset > startoffset ) :
footer = content [ startoffset : endoffset ]
content = string . replace ( content , footer , " " )
#put whole content into div tags
content = string . replace ( content , " <BR><HR> \n \n </BODY> " , ' </div> ' )
content = string . replace ( content , " </HTML> " , ' <!-- end of main --> ' )
#remove empty image subtitles
content = string . replace ( content , " <STRONG>Abbildung:</STRONG> " , " " )
2006-07-25 10:50:06 +02:00
#Bereich "author_info": Entfernen der Formatierung
startoffset = string . find ( content , ' <DIV CLASS= " author_info " ' )
endoffset = string . find ( content , " </STRONG> " , startoffset ) + len ( " </STRONG> " )
if ( startoffset > 10 ) and ( endoffset > startoffset ) :
2006-12-29 18:31:59 +01:00
content = string . replace ( content , ' <P ALIGN= " CENTER " > ' , " <p> " )
2006-08-12 18:28:38 +02:00
content = string . replace ( content , " <STRONG> " , " " )
content = string . replace ( content , " </STRONG> " , " " )
2006-12-29 18:31:59 +01:00
#TODO: TOC formatieren - Der Abschnitt <A NAME="SECTION00001000000000000000"> muss noch allgemeiner werden
2006-08-01 20:22:03 +02:00
content = string . replace ( content , ' <BR> \n \n <H2><A NAME= " SECTION00001000000000000000 " > \n Inhalt</A> \n </H2> \n <!--Table of Contents--> ' , ' \n <div id= " toc " > \n <div id= " toctitle " >Auf dieser Seite:</div> ' )
content = string . replace ( content , " <!--End of Table of Contents--> " , " </div> " )
2006-07-25 10:50:06 +02:00
2006-08-12 18:28:38 +02:00
# replace "./filename.png" references with "filename.png"
# (this avoids validation warnings)
content = string . replace ( content , ' HREF= " ./ ' , ' HREF= " ' )
2006-12-29 18:31:59 +01:00
# die Stylesheet-Einbindung von latex2html ist nicht standardkonform
# ein Bug-Report ging an die l2html-Mailingliste (29.12.02006)
content = string . replace ( content , ' <LINK REL= " STYLESHEET " HREF= ' , ' <link rel= " stylesheet " type= " text/css " href= ' )
# leere Absaetze in Tabellenelementen entfernen
content = string . replace ( content , ' \n <P></TD> ' , ' \n </td> ' )
2006-08-12 18:28:38 +02:00
# Fussnoten finden und Ende ersetzen
fussnoten = string . find ( content , " <BR><HR><H4>Fußnoten</H4> " )
startoffset = string . find ( content , " </DL> " , fussnoten )
endoffset = startoffset + len ( " </DL> " )
if startoffset > 10 and endoffset > 10 :
content = content [ : startoffset ] + " </DL> \n </div> " + content [ endoffset : ]
# Linie vor Fussnoten wird entfernt
# Fussnoten bekommen id, damit sie per css formatierbar sind
content = string . replace ( content , ' <HR><H4>Fußnoten</H4> ' , ' \n <div id= " footnotes " > \n <div id= " fntitle " >Fußnoten</div> ' )
# Sonderzeichen in Fussnoten werden ersetzt (Hotfix fuer einen latex2html-Bug)
content = string . replace ( content , ' <EFBFBD> ' , ' ä ' )
content = string . replace ( content , ' <EFBFBD> ' , ' Ä ' )
content = string . replace ( content , ' <EFBFBD> ' , ' ö ' )
content = string . replace ( content , ' <EFBFBD> ' , ' Ö ' )
content = string . replace ( content , ' <EFBFBD> ' , ' ü ' )
content = string . replace ( content , ' <EFBFBD> ' , ' Ü ' )
content = string . replace ( content , ' <EFBFBD> ' , ' ß ' )
# externe Links innerhalb von <div id="main"> erhalten eine class
2006-08-04 14:51:18 +02:00
content = string . replace ( content , ' HREF= " http: ' , ' class= " externalLink " href= " http: ' )
2006-08-12 18:28:38 +02:00
content = string . replace ( content , ' HREF= " https: ' , ' class= " externalLink " href= " https: ' )
2006-09-19 12:57:20 +02:00
# Geht zur Position des Progress-Menues,
2006-09-19 12:50:15 +02:00
# sucht nach einem Link = NameDerEingabedateiDiesesSkriptes,
# ersetzt ersten <li>-tag der folgt
2006-09-19 12:38:25 +02:00
startoffset = string . find ( content , ' <div id= " progress " > ' )
namensposition = string . find ( content , os . path . basename ( sys . argv [ 1 ] ) , startoffset )
if namensposition > 0 :
content = content [ : namensposition ] + string . replace ( content [ namensposition : ] , ' <li> ' , ' <li class= " next " > ' , 1 )
2006-08-12 18:28:38 +02:00
#all done, writing tuned files
writeFile ( content , sys . argv [ 1 ] )
except : #read/write/whatever failed,
2006-12-29 18:31:59 +01:00
# TODO: unklare Fehlerbehandlung
2006-08-12 18:28:38 +02:00
print " an defined error occured - but i ' m too lame to fix that "
print " usage: " , sys . argv [ 0 ] , " <file_to_modify> "
sys . exit ( 1 )
sys . exit ( 0 )