1. Output the old edition and the new edition to WebHelp (.htm files), each in its own folder
For each .htm file:
2. Remove everything before <!-- Placeholder for topic body. -->
3. Remove everything between < and >
4. Replace by a space
I wrote a VB6 program to do this (see below)
For all .htm files
5. Compare the two folders using the freeware CSDiff. (There is probably better comparison software.)
Result: Only changes in the actual text are displayed which can then be reported to the end-users
Code: Select all
' apologies no comments in the code: this is merely to give you an idea of what to do ....
Option Explicit
' HTML split out control information - does it work?
Public Sub main()
Dim din$, dout$, f$, l$, i&, ltswitch&, c$, m$, skip&
din$ = "C:\e\www\winsteps\winman\" ' folder for full WebHelp
dout$ = "c:\temp\winnew\" ' folder for text-only WebHelp
din$ = "C:\Documents and Settings\Mike\Desktop\winman\"
dout$ = "c:\temp\winold\"
f$ = Dir$(din$ + "*.htm")
While Len(f$) > 0
If InStr(f$, ".html") = 0 Then
Debug.Print f$
Open din$ + f$ For Input As #1
Open dout$ + f$ For Output As #2
skip& = 1
ltswitch& = 0
While Not EOF(1)
Line Input #1, l$
If skip& = 1 Then
If InStr(l$, "<!-- Placeholder for topic body. -->") > 0 Then
skip& = 0
End If
GoTo 100
End If
For i& = 1 To Len(l$)
c$ = Mid$(l$, i, 1)
If ltswitch& = 0 Then
If c$ = "<" Then
ltswitch& = 1
Else
m$ = m$ + c$
If c$ = ">" Then
Debug.Print l$ ' something has gone wrong
Stop
End If
End If
Else
If c$ = ">" Then
ltswitch& = 0
Else
If c$ = "<" Then
Debug.Print l$ ' something has gone wrong
Stop
End If
End If
End If
Next i&
i& = InStr(m$, " ")
While i& > 0
m$ = Left$(m$, i& - 1) + " " + Mid$(m$, i& + 6)
i& = InStr(m$, " ")
Wend
m$ = Trim$(m$)
If Len(m$) > 0 Then
Print #2, m$
m$ = ""
End If
100 '
Wend
Close #1
Close #2
End If
f$ = Dir$
Wend
Stop
End Sub