Metamorphing Machine I rather be this walking metamorphosis
than having that old formed opinion about everything!

Let's build a transpiler! Part 8

This is the eighth post in a series of building a transpiler. You can find the previous ones here.

Last time I said we would be going to put everything we did so far to use.

Believe it or not, but every single code sample in my blog have been painfully done manually.
It takes a lot of time, but now we have everything needed to create a little program to generate an .HTML file out of a source code file with coloring keywords and such.
We'll have to do some little adjustments: Let's go:

Option Explicit
Option Compare Text

Private Const SPAN_STRING = "<span style='color:brown'>"
Private Const SPAN_KEYWORD = "<span style='color:blue'>"
Private Const SPAN_COMMENT = "<span style='color: green;'>"

Public FileHandle_ As Integer

Public Enum TokenKind
tkWhiteSpace
tkComment
tkIdentifier
tkEscapedIdentifier
tkKeyword
tkIntegerNumber
tkFloatNumber
tkSciNumber
tkOctalNumber
tkHexaNumber
tkFileHandle
tkString
tkDateTime
tkOperator
tkLeftParenthesis
tkRightParenthesis
tkHardLineBreak
tkSoftLineBreak
tkListSeparator
tkPrintSeparator
tkDirective
tkEndOfStream
End Enum


Public Sub Main()
Dim Nbsp As Boolean
Dim Index As Integer
Dim HtmlHandler As Integer
Dim Text As String
Dim FilePath As String
Dim Token As Token

Rem Get an available file number.
FileHandle_ = FreeFile
Rem File path for the source code is passed as a command-line argument.
FilePath = Command$
Open FilePath For Binary As #FileHandle_

Rem Output file will have the same name as the input file, but with an .HTML extension.
Index = InStrRev(FilePath, ".")
If Index <> 0 Then FilePath = Left$(FilePath, Index - 1)

FilePath = FilePath & ".html"
HtmlHandler = FreeFile
Open FilePath For Output As #HtmlHandler

Rem Ensuring we close the file in case we have an error.
On Error GoTo CloseIt

If GetCodePoint <> -257 Then UngetChar
Nbsp = True

Do
Set Token = GetToken

Select Case Token.Kind
Case tkWhiteSpace
If Nbsp Then
Print #HtmlHandler, "&nbsp;";
Else
Print #HtmlHandler, " ";
End If

Case tkComment
Print #HtmlHandler, SPAN_COMMENT;
Print #HtmlHandler, EncodeHtml(Token.Text);
Print #HtmlHandler, "</span><br>"

Case tkIdentifier, tkIntegerNumber, tkFloatNumber, tkSciNumber
Print #HtmlHandler, Token.Text;
Nbsp = False

Case tkEscapedIdentifier
Print #HtmlHandler, "["; Token.Text; "]";
Nbsp = False

Case tkKeyword
Print #HtmlHandler, SPAN_KEYWORD; Token.Text; "</span>";
Nbsp = False

Case tkOctalNumber
Print #HtmlHandler, "&amp;O"; Token.Text;

Case tkHexaNumber
Print #HtmlHandler, "&amp;H"; Token.Text;

Case tkFileHandle
Print #HtmlHandler, "#"; Token.Text;

Case tkString
Text = Token.Text
Text = Replace(Token.Text, """", """""")
Text = EncodeHtml(Text)
Print #HtmlHandler, SPAN_STRING; """"; Text; """"; "</span>";

Case tkDateTime
Print #HtmlHandler, "#"; Token.Text; "#";

Case tkOperator
If StrComp(Token.Text, Token.Text, vbBinaryCompare) = 0 Then
Print #HtmlHandler, EncodeHtml(Token.Text);
Else
Print #HtmlHandler, SPAN_KEYWORD; Token.Text; "</span>";
End If

Case tkLeftParenthesis, tkRightParenthesis, tkSoftLineBreak, tkListSeparator, tkPrintSeparator
Print #HtmlHandler, Token.Text;

Case tkHardLineBreak
Print #HtmlHandler, "<br>"
Nbsp = True

Case tkDirective
Print #HtmlHandler, "#"; Token.Text;
Nbsp = False

Case tkEndOfStream
Exit Do
End Select

If Token.Suffix <> vbNullChar Then Print #HtmlHandler, Token.Suffix;
Loop

CloseIt:
Close #HtmlHandler
Close #FileHandle_
Rem This is equivalent to a Throw in a Catch.
If Err.Number Then Err.Raise Err.Number
End Sub


Private Function EncodeHtml(ByVal Text As String) As String
Text = Replace(Text, "&", "&amp;")
Text = Replace(Text, "<", "&lt;")
Text = Replace(Text, ">", "&gt;")
EncodeHtml = Text
End Function


Private Function GetToken() As Token
Dim Cp As Integer
Dim Ch As String * 1
Dim Token As Token

If EOF(FileHandle_) Then
Set GetToken = NewToken("", tkEndOfStream)
Exit Function
End If

Cp = GetCodePoint
Ch = ToChar(Cp)

Select Case Ch
Case "["
Set Token = ReadEscapedIdentifier

Case """"
Set Token = ReadString

Case "&"
Set Token = ReadAmpersand

Case "#"
Set Token = ReadHash

Case "0" To "9"
Set Token = ReadNumber(Ch)

Case "+", "-", "*", "/", "\", "^", "=", ".", "!"
Set Token = NewToken(Ch, tkOperator)

Case "<"
Set Token = NewToken(Ch, tkOperator)
Ch = GetChar

If Ch = ">" Or Ch = "=" Then
Token.Text = Token.Text & Ch
Else
UngetChar
End If

Case ">"
Set Token = NewToken(Ch, tkOperator)

If GetChar = "=" Then
Token.Text = Token.Text & "="
Else
UngetChar
End If

Case ":"
Set Token = NewToken(Ch, tkSoftLineBreak)

Case vbLf
Set Token = NewToken(Ch, tkHardLineBreak)

Case vbCr
If GetCodePoint <> 10 Then UngetChar
Set Token = NewToken(vbLf, tkHardLineBreak)

Case "'"
Set Token = ReadComment(Ch)

Case ","
Set Token = NewToken(Ch, tkListSeparator)

Case ";"
Set Token = NewToken(Ch, tkPrintSeparator)

Case "("
Set Token = NewToken(Ch, tkLeftParenthesis)

Case ")"
Set Token = NewToken(Ch, tkRightParenthesis)

Case Else
If IsSpace(Cp) Then
If Not EOF(FileHandle_) Then
If GetChar = "_" Then
If EOF(FileHandle_) Then
UngetChar
Else
Cp = GetCodePoint
If Cp <> 10 And Cp <> 13 Then UngetChar 2
End If
Else
UngetChar
End If
End If

Set Token = NewToken(" ", tkWhiteSpace)

ElseIf IsLetter(Cp) Then
Set Token = ReadIdentifier(Cp)

If IsKeyword(Token.Text) And Token.Suffix = vbNullChar Then
Token.Kind = tkKeyword
If Token.Text = "Rem" Then Set Token = ReadComment(Token.Text)

ElseIf IsOperator(Token.Text) And Token.Suffix = vbNullChar Then
Token.Kind = tkOperator
End If

Else
Set Token = New Token
End If
End Select

Set GetToken = Token
End Function

It's not perfect, but it helps a lot.
Next week: We'll discuss some pending points before we move on.

Andrej Biasic
2020-09-02