Let's build a transpiler! Part 8
This is the eighth post in a series of building a transpiler. You can find the previous ones here.Last time I said we would be going to put everything we did so far to use.
Believe it or not, but every single code sample in my blog have been painfully done manually.
It takes a lot of time, but now we have everything needed to create a little program to generate an .HTML file out of a source code file with coloring keywords and such.
We'll have to do some little adjustments:
- Our sub Main will be morphed into a function GetToken;
- We need some way to detect there are no more tokens to be read, so we'll add a tkEndOfStream to TokenKind enum and use it when we run out of tokens;
- We'll create a function EncodeHtml to encode some characters to HTML entities;
- Finally, we'll create a new sub Main, where we will not only open the input source code file for reading but another one to output HTML.
Option Explicit
Option Compare Text
Private Const SPAN_STRING = "<span style='color:brown'>"
Private Const SPAN_KEYWORD = "<span style='color:blue'>"
Private Const SPAN_COMMENT = "<span style='color: green;'>"
Public FileHandle_ As Integer
Public Enum TokenKind
tkWhiteSpace
tkComment
tkIdentifier
tkEscapedIdentifier
tkKeyword
tkIntegerNumber
tkFloatNumber
tkSciNumber
tkOctalNumber
tkHexaNumber
tkFileHandle
tkString
tkDateTime
tkOperator
tkLeftParenthesis
tkRightParenthesis
tkHardLineBreak
tkSoftLineBreak
tkListSeparator
tkPrintSeparator
tkDirective
tkEndOfStream
End Enum
Public Sub Main()
Dim Nbsp As Boolean
Dim Index As Integer
Dim HtmlHandler As Integer
Dim Text As String
Dim FilePath As String
Dim Token As Token
Rem Get an available file number.
FileHandle_ = FreeFile
Rem File path for the source code is passed as a command-line argument.
FilePath = Command$
Open FilePath For Binary As #FileHandle_
Rem Output file will have the same name as the input file, but with an .HTML extension.
Index = InStrRev(FilePath, ".")
If Index <> 0 Then FilePath = Left$(FilePath, Index - 1)
FilePath = FilePath & ".html"
HtmlHandler = FreeFile
Open FilePath For Output As #HtmlHandler
Rem Ensuring we close the file in case we have an error.
On Error GoTo CloseIt
If GetCodePoint <> -257 Then UngetChar
Nbsp = True
Do
Set Token = GetToken
Select Case Token.Kind
Case tkWhiteSpace
If Nbsp Then
Print #HtmlHandler, " ";
Else
Print #HtmlHandler, " ";
End If
Case tkComment
Print #HtmlHandler, SPAN_COMMENT;
Print #HtmlHandler, EncodeHtml(Token.Text);
Print #HtmlHandler, "</span><br>"
Case tkIdentifier, tkIntegerNumber, tkFloatNumber, tkSciNumber
Print #HtmlHandler, Token.Text;
Nbsp = False
Case tkEscapedIdentifier
Print #HtmlHandler, "["; Token.Text; "]";
Nbsp = False
Case tkKeyword
Print #HtmlHandler, SPAN_KEYWORD; Token.Text; "</span>";
Nbsp = False
Case tkOctalNumber
Print #HtmlHandler, "&O"; Token.Text;
Case tkHexaNumber
Print #HtmlHandler, "&H"; Token.Text;
Case tkFileHandle
Print #HtmlHandler, "#"; Token.Text;
Case tkString
Text = Token.Text
Text = Replace(Token.Text, """", """""")
Text = EncodeHtml(Text)
Print #HtmlHandler, SPAN_STRING; """"; Text; """"; "</span>";
Case tkDateTime
Print #HtmlHandler, "#"; Token.Text; "#";
Case tkOperator
If StrComp(Token.Text, Token.Text, vbBinaryCompare) = 0 Then
Print #HtmlHandler, EncodeHtml(Token.Text);
Else
Print #HtmlHandler, SPAN_KEYWORD; Token.Text; "</span>";
End If
Case tkLeftParenthesis, tkRightParenthesis, tkSoftLineBreak, tkListSeparator, tkPrintSeparator
Print #HtmlHandler, Token.Text;
Case tkHardLineBreak
Print #HtmlHandler, "<br>"
Nbsp = True
Case tkDirective
Print #HtmlHandler, "#"; Token.Text;
Nbsp = False
Case tkEndOfStream
Exit Do
End Select
If Token.Suffix <> vbNullChar Then Print #HtmlHandler, Token.Suffix;
Loop
CloseIt:
Close #HtmlHandler
Close #FileHandle_
Rem This is equivalent to a Throw in a Catch.
If Err.Number Then Err.Raise Err.Number
End Sub
Private Function EncodeHtml(ByVal Text As String) As String
Text = Replace(Text, "&", "&")
Text = Replace(Text, "<", "<")
Text = Replace(Text, ">", ">")
EncodeHtml = Text
End Function
Private Function GetToken() As Token
Dim Cp As Integer
Dim Ch As String * 1
Dim Token As Token
If EOF(FileHandle_) Then
Set GetToken = NewToken("", tkEndOfStream)
Exit Function
End If
Cp = GetCodePoint
Ch = ToChar(Cp)
Select Case Ch
Case "["
Set Token = ReadEscapedIdentifier
Case """"
Set Token = ReadString
Case "&"
Set Token = ReadAmpersand
Case "#"
Set Token = ReadHash
Case "0" To "9"
Set Token = ReadNumber(Ch)
Case "+", "-", "*", "/", "\", "^", "=", ".", "!"
Set Token = NewToken(Ch, tkOperator)
Case "<"
Set Token = NewToken(Ch, tkOperator)
Ch = GetChar
If Ch = ">" Or Ch = "=" Then
Token.Text = Token.Text & Ch
Else
UngetChar
End If
Case ">"
Set Token = NewToken(Ch, tkOperator)
If GetChar = "=" Then
Token.Text = Token.Text & "="
Else
UngetChar
End If
Case ":"
Set Token = NewToken(Ch, tkSoftLineBreak)
Case vbLf
Set Token = NewToken(Ch, tkHardLineBreak)
Case vbCr
If GetCodePoint <> 10 Then UngetChar
Set Token = NewToken(vbLf, tkHardLineBreak)
Case "'"
Set Token = ReadComment(Ch)
Case ","
Set Token = NewToken(Ch, tkListSeparator)
Case ";"
Set Token = NewToken(Ch, tkPrintSeparator)
Case "("
Set Token = NewToken(Ch, tkLeftParenthesis)
Case ")"
Set Token = NewToken(Ch, tkRightParenthesis)
Case Else
If IsSpace(Cp) Then
If Not EOF(FileHandle_) Then
If GetChar = "_" Then
If EOF(FileHandle_) Then
UngetChar
Else
Cp = GetCodePoint
If Cp <> 10 And Cp <> 13 Then UngetChar 2
End If
Else
UngetChar
End If
End If
Set Token = NewToken(" ", tkWhiteSpace)
ElseIf IsLetter(Cp) Then
Set Token = ReadIdentifier(Cp)
If IsKeyword(Token.Text) And Token.Suffix = vbNullChar Then
Token.Kind = tkKeyword
If Token.Text = "Rem" Then Set Token = ReadComment(Token.Text)
ElseIf IsOperator(Token.Text) And Token.Suffix = vbNullChar Then
Token.Kind = tkOperator
End If
Else
Set Token = New Token
End If
End Select
Set GetToken = Token
End Function
It's not perfect, but it helps a lot.
Next week: We'll discuss some pending points before we move on.
Andrej Biasic
2020-09-02