type [<RequireQualifiedAccess>] RawCharacterType =
static member FromChar char =
if System.Char.IsWhiteSpace(char) then
RawCharacterType.Whitespace
elif char = '\r' || char = '\n' then
elif System.Char.IsLetterOrDigit(char) then
RawCharacterType.PunctuationOrSymbol
| Whitespace | NewLine | Word -> true
| PunctuationOrSymbol -> false
type [<RequireQualifiedAccess>] Keyword =
type [<RequireQualifiedAccess>] TokenType =
| StringLiteral of string
RawCharacterType: RawCharacterType
if String.IsNullOrWhiteSpace(arg) then None
match arg.Trim().ToLower() with
| "let" -> Some(Keyword.Let)
| "false" -> Some(Keyword.False)
| "true" -> Some(Keyword.True)
if String.IsNullOrWhiteSpace(arg) then None
match Int32.TryParse arg with
| true, result -> Some(result)
let GetTokenType (text: string) rawCharType =
| RawCharacterType.Whitespace -> TokenType.Whitespace
| RawCharacterType.NewLine -> TokenType.NewLine
| RawCharacterType.PunctuationOrSymbol -> TokenType.Punctuation(text.[0])
| RawCharacterType.Word ->
| Keyword(word) -> TokenType.Keyword(word)
| Integer(integer) -> TokenType.Integer
| _ -> TokenType.Identifier
let Tokenize (text: string) lineNumber =
let endWord rawCharType nextChar =
let nextTokenType = RawCharacterType.FromChar x
if nextTokenType = rawCharType && rawCharType.ChainType then false, None
else true, Some(nextTokenType)
let rec addToken rawCharType startColumn endColumn =
let tokenText = text.[startColumn..endColumn]
RawCharacterType = rawCharType
TokenType = (GetTokenType tokenText rawCharType)
let nextColumn = endColumn + 1
let peekChar = if nextColumn >= text.Length then None
else Some(text.[nextColumn])
match endWord rawCharType peekChar with
| false, Some(_) -> failwithf "Word continuation should not return Token Type: \"%s\"" text
| true, Some(nextTokenType) -> createToken() :: (addToken nextTokenType nextColumn nextColumn)
| true, None -> [ createToken() ]
| false, None -> addToken rawCharType startColumn nextColumn
if text.Length = 0 then []
let initialTokenType = RawCharacterType.FromChar text.[0]
addToken initialTokenType 0 0
open System.Text.RegularExpressions
member this.OperatorText =
type [<RequireQualifiedAccess>] ASTBatch =
| Batch of ASTStatement list
and [<RequireQualifiedAccess>] ASTStatement =
| Assignment of ASTIdentifier * ASTExpression
and [<RequireQualifiedAccess>] ASTExpression =
| DecimalLiteral of decimal
and [<RequireQualifiedAccess>] ASTIdentifier =
and [<RequireQualifiedAccess>] ASTOperator =
| Operator of OperatorType
type [<RequireQualifiedAccess>] ParseResult<'T> =
| Success of 'T * Lexer.Token list
type Parser<'T> = ((Lexer.Token list) -> ParseResult<'T>)
let private choose (parsers: Parser<_> list) (tokens: Lexer.Token list) =
let rec combinator (remainingParsers: Parser<_> list) (parserResults: ParseResult<_> list) =
match remainingParsers with
| singleParser :: remain ->
let testResults = singleParser tokens
| ParseResult.Success(_) ->
| ParseResult.Failure(_) ->
combinator remain (testResults :: parserResults)
let Chain (parser1: Parser<'a>) (parser2: Parser<'b>) (tokens: Lexer.Token list) =
let parser1Result = parser1 tokens
| ParseResult.Success(p1R, remainingTokens1) ->
let parser2Result = parser2 remainingTokens1
| ParseResult.Success(p2R, remainingTokens2) ->
ParseResult.Success((p1R, p2R), remainingTokens2)
| ParseResult.Failure(error2) ->
ParseResult.Failure(error2)
| ParseResult.Failure(error1) ->
ParseResult.Failure(error1)
let ChainLeft (parser1: Parser<_>) (parser2: Parser<_>) (tokens: Lexer.Token list) =
let parser1Result = parser1 tokens
| ParseResult.Success(p1R, remainingTokens1) ->
let parser2Result = parser2 remainingTokens1
| ParseResult.Success(_, remainingTokens2) ->
ParseResult.Success(p1R, remainingTokens2)
| ParseResult.Failure(error2) ->
ParseResult.Failure(error2)
| ParseResult.Failure(error1) ->
ParseResult.Failure(error1)
let ChainRight (parser1: Parser<_>) (parser2: Parser<_>) (tokens: Lexer.Token list) =
let parser1Result = parser1 tokens
| ParseResult.Success(_, remainingTokens1) ->
let parser2Result = parser2 remainingTokens1
| ParseResult.Success(p2R, remainingTokens2) ->
ParseResult.Success(p2R, remainingTokens2)
| ParseResult.Failure(error2) ->
ParseResult.Failure(error2)
| ParseResult.Failure(error1) ->
ParseResult.Failure(error1)
let private next (tokenList: Lexer.Token list) =
| x :: items -> Some(x), items
let ParseWhitespace tokenList =
let rec ignoreWhitespace remainingTokens initialWhiteSpace =
match next remainingTokens with
| TokenType.Whitespace ->
ignoreWhitespace l1 false
match initialWhiteSpace with
| true -> ParseResult.Failure("Expected whitespace")
| false -> ParseResult.Success((), remainingTokens)
| _ -> ParseResult.Failure("End of file")
ignoreWhitespace tokenList true
let ParseASTIdentifier tokenList =
match next tokenList with
| TokenType.Identifier ->
match Regex.IsMatch(x1.Text, "^[A-Za-z][A-Za-z0-9]*$") with
| true -> ParseResult.Success(ASTIdentifier.Identifier x1.Text, l1)
| false -> ParseResult.Failure(sprintf "\"%s\" is not identifier." x1.Text)
| _ -> ParseResult.Failure(sprintf "\"%s\" is not identifier." x1.Text)
| _ -> ParseResult.Failure("End of file")
let ParseASTInteger tokenList =
match next tokenList with
match System.Int32.TryParse x1.Text with
| true, value -> ParseResult.Success(ASTExpression.IntegerLiteral value, l1)
| false, _ -> ParseResult.Failure(sprintf "\"%s\" is not integer." x1.Text)
| _ -> ParseResult.Failure(sprintf "\"%s\" is not integer." x1.Text)
| _ -> ParseResult.Failure("End of file")
let ParseASTIsOperator (operator: OperatorType) tokenList =
let operatorText = operator.OperatorText
let rec readOperator remainingTokens remainingOperatorText =
match remainingOperatorText with
ParseResult.Success((), remainingTokens)
match next remainingTokens with
| TokenType.Punctuation(c) when x = c ->
ParseResult.Failure(sprintf "Expected \"%s\"" operatorText)
ParseResult.Failure("End of file")
readOperator tokenList (operatorText.ToCharArray() |> Array.toList)
let ParseASTIsKeyword (keyword: Keyword) tokenList =
match next tokenList with
| TokenType.Keyword(k) when k = keyword -> ParseResult.Success((), l1)
| _ -> ParseResult.Failure(sprintf "\"%s\" is not keyword \"%A\"." x1.Text keyword)
| _ -> ParseResult.Failure("End of file")
let ParseTerminator = (ParseASTIsOperator OperatorType.Terminator)
let inline (<!>) parser1 parser2 = Chain parser1 parser2
let inline (<<) parser1 parser2 = ChainLeft parser1 parser2
let inline (>>) parser1 parser2 = ChainRight parser1 parser2
let Translate<'a,'b> (success: 'a -> 'b) (parser: Parser<'a>) =
| ParseResult.Success(t, remainingTokens) -> ParseResult.Success(success t, remainingTokens)
| ParseResult.Failure(error) -> ParseResult.Failure(error))
let TranslateWithError<'a,'b> (success: 'a -> 'b, error: string -> string) (parser: ParseResult<'a>) =
| ParseResult.Success(t, remainingTokens) -> ParseResult.Success(success t, remainingTokens)
| ParseResult.Failure(error) -> ParseResult.Failure(error)
(((ParseASTIsKeyword Keyword.Let) >> ParseWhitespace) >> ParseASTIdentifier << ParseWhitespace << (ParseASTIsOperator OperatorType.Equals) << ParseWhitespace <!> ParseASTInteger << ParseTerminator)
|> Translate (ASTStatement.Assignment)
printfn "#### Printing Tokens ####"
let tokens = Lexer.Tokenize "let four = 5;" 0
let parseResult = AST.ParseAssignment tokens
printfn "#### Printing Abstract Syntax Tree ####"