LexAna is a mixed (bottom-up and top-down), nondeterministic, minimalistic, parsing system.
Features:
- Based on the regular expressions for lexical and syntax processing
- Syntax for Lexer and Parser is the same: T -> regex (T - char)
- Input is a text string, output is a parsing tree
- Suitable for simple parsing, but parses even some programming languages
- A simple and intuitive (humanlike) approach
- No knowledge is required
- No need for additional informations (parsing tables, stacks ...)
- No states (stateless parsing system)
- Imperative as well as descriptive
- Easy error handling
- Parser never fails, always succeeds
- No ambiguity
- Search and reduce parsing technique
- Automatically builds a syntax tree
- Reverse level order tree building
- Constant tree transformation in the syntax analysis
- No need for a grammar
- Easy to control parsing process
Lexana deals with:
- Formats (INI, HTML, JSON, XML ...)
- Complex expressions with operator precedence ...
- A domain-specific languages (DSL)
- Programming languages (Basic, Logo, Tiny, PL/0 ...)
How Lexana parser works:
- After tokenization, every token is a character and all tokens build a string, which is further processed by Parser.
- Parser, guided by parsing rules, searches through the string of tokens for syntactic units and replaces all groups of tokens by a new token.
Lexemes of replaced tokens are wrapped and assigned to that token. The substitution can be inside the string, not necessarily at the beginning.
- The parser stops when all parsing rules are applied. If the token string contains only one token, parsing is successful.
The engine module:
Code:
Public keep As String, flat As Boolean, IC As Boolean, re ' LexAna Engine ver. 1.0.
Public Function LexAnaScan(target As String, Lexer) As Variant
Dim tree, ans, tokens As String, csize As Integer, curt As String, line As Integer
line = 1: j = 1: tokens = ""
re.IgnoreCase = IC: re.Global = False
ReDim tree(Len(target))
Do While Len(target) > 0 And line <= UBound(Lexer)
csize = match(Lexer(line), target, ans)
If csize > 0 Then
curt = Mid(Lexer(0), line, 1): tree(j) = ans
If curt = "." Then curt = LCase(Left(ans, 1))
If curt <> "_" Then tokens = tokens & curt: j = j + 1
line = 0
End If
line = line + 1
Loop
tree(0) = tokens: ReDim Preserve tree(j - 1): LexAnaScan = tree
End Function
Private Function match(ByVal patt As String, target As String, ans) As Integer
Dim csize As Integer
re.Pattern = patt: Set colMatch = re.Execute(target)
If colMatch.Count = 0 Then match = 0: Exit Function 'No matches
If colMatch(0).FirstIndex <> 0 Or colMatch(0).Length = 0 Then match = 0: Exit Function
csize = colMatch(0).SubMatches.Count
If csize > 1 Then
ReDim ans(csize - 1)
For j = 0 To csize - 1: ans(j) = colMatch(0).SubMatches(j): Next ' Array
ElseIf csize = 1 Then
ans = colMatch(0).SubMatches(0) ' String
Else
ans = colMatch(0) ' String
End If
match = csize + 1: target = Right(target, Len(target) - colMatch(0).Length)
End Function
Public Function LexAnaParse(tree, Parser) As Integer
Dim line As Integer
re.IgnoreCase = False: re.Global = True
For line = 0 To UBound(Parser) - 1 Step 2: LexAnaParse = Reduce(tree, Parser, line): Next
End Function
Private Function Reduce(tree, Parser, line As Integer) As Integer
Dim recTree, colMatch, swap As String, rep As String
Dim nT As String, matched As String, tokens As String, ans As String
Dim nxt As Long, FI As Long, ND As Long, SZ As Long, lrec As Integer
swap = Parser(line + 1): rep = ""
If Len(swap) > 1 Then rep = Right(swap, 1): swap = Mid(swap, 1, Len(swap) - 1)
Do
tokens = tree(0): re.Pattern = Parser(line): Set colMatch = re.Execute(tokens)
If colMatch.Count = 0 Or tokens = "" Then Exit Do ' No matches or no what to match
nxt = colMatch(0).FirstIndex + 1: nT = Left(tokens, nxt - 1)
For n = 0 To colMatch.Count - 1
FI = colMatch(n).FirstIndex + 1: SZ = colMatch(n).Length
If SZ > 0 Then
matched = Mid(tokens, FI, SZ)
If rep = "@" Then
lrec = Len(matched) - 2: ReDim recTree(lrec): recTree(0) = Mid(matched, 2, lrec)
For k = 1 To lrec: recTree(k) = tree(FI + k): Next
q = LexAnaParse(recTree, Parser) ' Recursive call
ans = recTree(0)
If Len(ans) = 1 And InStr(swap, ans) <> 0 Then ' Must be a letter from the swap list
tree(FI + 1) = recTree(1): tree(FI + 2) = tree(FI + lrec + 1)
tree(nxt) = wrap(tree, FI, Left(matched, 1) & ans & Right(matched, 1))
nT = nT & ans: nxt = nxt + 1
Else
FI = FI - SZ
End If
Else
If swap <> "" Then tree(nxt) = wrap(tree, FI, matched): nT = nT & swap: nxt = nxt + 1
End If
If n < colMatch.Count - 1 Then ND = colMatch(n + 1).FirstIndex Else ND = Len(tokens)
For i = FI + SZ To ND
tree(nxt) = tree(i): nT = nT & Mid(tokens, i, 1): nxt = nxt + 1
Next
End If
Next
tree(0) = nT: ReDim Preserve tree(nxt - 1)
Loop While nT <> tokens And (rep = "+" Or rep = "@")
Reduce = 1
End Function
Private Function wrap(shrink, pos As Long, ByVal txt As String) As Variant
Dim i As Integer, ans
i = 0: ReDim ans(Len(txt))
For k = 1 To Len(txt)
If Mid(txt, k, 1) Like keep Then ans(i) = shrink(pos + k - 1): i = i + 1
Next
If i > 0 Then
ReDim Preserve ans(i - 1)
If i = 1 And (Len(txt) = 1 Or flat And IsArray(ans(0))) Then wrap = ans(0) Else wrap = ans
End If
End Function