Lexer :: struct { input : string; cursor : int; start : int; current_line : int; current_column : int; result : *Compile_Result; path : string; } Token_Kind :: enum { TOKEN_FLOATLITERAL; TOKEN_INTLITERAL; TOKEN_LOGICALOR; TOKEN_LOGICALAND; TOKEN_ISEQUAL; TOKEN_ISNOTEQUAL; TOKEN_PLUSEQUALS; TOKEN_MINUSEQUALS; TOKEN_TIMESEQUALS; TOKEN_DIVEQUALS; TOKEN_MODEQUALS; TOKEN_LESSEQUALS; TOKEN_LESS; TOKEN_GREATEREQUALS; TOKEN_GREATER; TOKEN_COLON; TOKEN_DOUBLECOLON; TOKEN_ASSIGN; TOKEN_ARROW; TOKEN_AT; TOKEN_PLUS; TOKEN_STAR; TOKEN_SLASH; TOKEN_MOD; TOKEN_MINUS; TOKEN_LEFTBRACE; TOKEN_RIGHTBRACE; TOKEN_LEFTBRACKET; TOKEN_RIGHTBRACKET; TOKEN_LEFTPAREN; TOKEN_RIGHTPAREN; TOKEN_SEMICOLON; TOKEN_COMMA; TOKEN_DOT; TOKEN_DOTDOT; TOKEN_IDENTIFIER; // Keywords TOKEN_BOOL; TOKEN_CASE; TOKEN_CBUFFER; TOKEN_COLUMNMAJOR; TOKEN_CONST; TOKEN_CONSTANT_BUFFER; TOKEN_CONTINUE; TOKEN_DEFAULT; TOKEN_DIRECTIVE; TOKEN_DISCARD; TOKEN_DO; TOKEN_DOUBLE; TOKEN_ELSE; TOKEN_EXPORT; TOKEN_EXTERN; TOKEN_FALSE; TOKEN_FOR; TOKEN_HALF; TOKEN_HINT; TOKEN_IF; TOKEN_IN; TOKEN_INOUT; TOKEN_INSTANCE; TOKEN_MATRIX; TOKEN_META; TOKEN_OPTIONAL; TOKEN_OUT; TOKEN_PIXEL; TOKEN_PROPERTIES; TOKEN_RETURN; TOKEN_REGISTER; TOKEN_STRING; TOKEN_STRUCT; TOKEN_SWITCH; TOKEN_TRUE; TOKEN_UNORM; TOKEN_UNSIGNED; TOKEN_UINT; TOKEN_VECTOR; TOKEN_VERTEX; TOKEN_VOID; TOKEN_WHILE; TOKEN_EOF; TOKEN_ERROR; } Token :: struct { kind : Token_Kind; union { ident_value : string; integer_value : int; float_value : float; string_value : string; } source : *u8; // This could all be derived on demand line : int; length : int; column : int; index : int; error : string; builtin : bool; // @Incomplete: This is kind of a bad idea, but let's just do it for now... } Source_Range :: struct { begin : Token; end : Token; main_token : Token; } is_at_end :: (using lexer : *Lexer) -> bool { return input.data[cursor] == #char "\0" || cursor == input.count; } peek_char :: (using lexer : *Lexer) -> u8 { return input.data[cursor]; } peek_next_char :: (using lexer : *Lexer) -> u8 { if is_at_end(lexer) return #char "\0"; return input.data[cursor + 1]; } match_character :: (lexer : *Lexer, expected : u8) -> bool { if is_at_end(lexer) return false; if lexer.input.data[lexer.cursor] != expected return false; lexer.cursor += 1; return true; } identifier :: (lexer : *Lexer) -> *Token { while is_alpha(peek_char(lexer)) || is_digit(peek_char(lexer)) || peek_char(lexer) == #char "_" { advance(lexer); } return make_identifier(lexer, identifier_kind(lexer)); } directive :: (lexer : *Lexer) -> *Token { advance(lexer); while is_alpha(peek_char(lexer)) || is_digit(peek_char(lexer)) || peek_char(lexer) == #char "_" { advance(lexer); } return make_directive(lexer); } number :: (lexer : *Lexer) -> *Token { while is_digit(peek_char(lexer)) advance(lexer); is_float := false; if peek_char(lexer) == #char "." && is_digit(peek_next_char(lexer)) { is_float = true; advance(lexer); f_suffix := false; while is_digit(peek_char(lexer)) { advance(lexer); } if peek_char(lexer) == #char "f" { advance(lexer); record_error(lexer, "We don't use 'f' suffixes for floating point values."); return null; } } if is_float { return make_float(lexer); } return make_int(lexer); } identifier_kind :: (using lexer : *Lexer) -> Token_Kind { length := cursor - lexer.start; index := start; identifier : string; identifier.data = *input.data[start]; identifier.count = length; if identifier == "bool" return .TOKEN_BOOL; if identifier == "case" return .TOKEN_CASE; if identifier == "columnmajor" return .TOKEN_COLUMNMAJOR; if identifier == "const" return .TOKEN_CONST; if identifier == "constant_buffer" return .TOKEN_CONSTANT_BUFFER; if identifier == "continue" return .TOKEN_CONTINUE; if identifier == "default" return .TOKEN_DEFAULT; if identifier == "directive" return .TOKEN_DIRECTIVE; if identifier == "discard" return .TOKEN_DIRECTIVE; if identifier == "discard" return .TOKEN_DISCARD; if identifier == "do" return .TOKEN_DO; if identifier == "double" return .TOKEN_DOUBLE; if identifier == "else" return .TOKEN_ELSE; if identifier == "export" return .TOKEN_EXPORT; if identifier == "extern" return .TOKEN_EXTERN; if identifier == "false" return .TOKEN_FALSE; if identifier == "for" return .TOKEN_FOR; if identifier == "half" return .TOKEN_HALF; if identifier == "hint" return .TOKEN_HINT; if identifier == "if" return .TOKEN_IF; if identifier == "in" return .TOKEN_IN; if identifier == "inout" return .TOKEN_INOUT; if identifier == "instance" return .TOKEN_INSTANCE; if identifier == "matrix" return .TOKEN_MATRIX; if identifier == "meta" return .TOKEN_META; if identifier == "optional" return .TOKEN_OPTIONAL; if identifier == "out" return .TOKEN_OUT; if identifier == "pixel" return .TOKEN_PIXEL; if identifier == "properties" return .TOKEN_PROPERTIES; if identifier == "return" return .TOKEN_RETURN; if identifier == "register" return .TOKEN_REGISTER; if identifier == "struct" return .TOKEN_STRUCT; if identifier == "switch" return .TOKEN_SWITCH; if identifier == "true" return .TOKEN_TRUE; if identifier == "unorm" return .TOKEN_UNORM; if identifier == "unsigned" return .TOKEN_UNSIGNED; if identifier == "uint" return .TOKEN_UINT; if identifier == "vector" return .TOKEN_VECTOR; if identifier == "vertex" return .TOKEN_VERTEX; if identifier == "void" return .TOKEN_VOID; if identifier == "while" return .TOKEN_WHILE; return .TOKEN_IDENTIFIER; } error_token :: (lexer : *Lexer, message : string) -> *Token { token : *Token = new_token(lexer, .TOKEN_ERROR); lexer.result.had_error = true; token.error = copy_string(message); return token; } // unable_to_open_file :: (state : *Parse_State, path : string, token : Token) { // builder : String_Builder; // init_string_builder(*builder,, temp); // print_to_builder(*builder, "Unable to open file '%' for reading\n\n", path); // location := generate_source_location_from_token(state, token); // indent(*builder, 1); // cyan(*builder); // print_to_builder(*builder, "%\n", print_from_source_location(location)); // indent(*builder, 1); // loc := location.begin; // print_token_pointer(*builder, loc); // final_message := builder_to_string(*builder); // record_error(state, token, final_message, false); // } record_error :: (lexer : *Lexer, message : string) { error : Compiler_Message; error.message_kind = .Error; error.message = message; error.path = lexer.path; token := error_token(lexer, message); source_location : Source_Range; source_location.main_token = token; token.length += token.column; token.source -= token.column; token.column = 0; source_location.begin = token; length := source_location.begin.column; source_location.end = token; array_add(*error.source_locations, source_location); lexer.result.had_error = true; array_add(*lexer.result.messages, error); } make_int :: (lexer : *Lexer) -> *Token { token : *Token = new_token(lexer, .TOKEN_INTLITERAL); str : string = .{ count = token.length, data = *lexer.input.data[lexer.start] }; value, ok := string_to_int(str); if ok { token.integer_value = value; } return token; } make_float :: (lexer : *Lexer) -> *Token { token : *Token = new_token(lexer, .TOKEN_FLOATLITERAL); str : string = .{ count = token.length, data = *lexer.input.data[lexer.start] }; value, ok := string_to_float(str); if ok { token.float_value = value; } return token; } new_token :: (lexer : *Lexer, kind : Token_Kind) -> *Token { length := lexer.cursor - lexer.start; token : Token; token.kind = kind; token.line = lexer.current_line; token.length = length; token.column = lexer.current_column; token.index = lexer.cursor - token.length; if token.length > 0 { token.source = *lexer.input[token.index]; } else { token.source = *lexer.input[token.index - 1]; } lexer.current_column += length; array_add(*lexer.result.tokens, token); return *lexer.result.tokens[lexer.result.tokens.count - 1]; } make_directive :: (lexer : *Lexer) -> *Token { lexer.start += 1; ident := make_identifier(lexer, .TOKEN_DIRECTIVE); if ident.ident_value == "load" { path_tok := scan_next_token(lexer); path := path_tok.string_value; result : Compile_Result; result.allocator = lexer.result.allocator; result.environment = lexer.result.environment; result.file = make_file(*result, path); if result.file.source.count == 0 { // unable_to_open_file(lexer, path, path_tok); record_error(lexer, tprint("Unable to open file '%' for reading\n", path)); return error_token(lexer, tprint("Unable to open file '%' for reading\n", path)); } lex(*result); result.tokens.count -= 1; // @Note: remote TOKEN_EOF lexer.result.tokens.count -= 2; array_resize(*lexer.result.tokens, lexer.result.tokens.count + result.tokens.count); for tok : result.tokens { lexer.result.tokens[it_index] = tok; } return scan_next_token(lexer);; } return ident; } make_string :: (lexer : *Lexer) -> *Token { token : *Token = new_token(lexer, .TOKEN_STRING); name : string = .{ count = token.length - 2, data = *lexer.input.data[lexer.start + 1] }; token.string_value = name; return token; } make_identifier :: (lexer : *Lexer, kind : Token_Kind) -> *Token { token : *Token = new_token(lexer, kind); name : string = .{ count = token.length, data = *lexer.input.data[lexer.start] }; token.ident_value = name; return token; } make_token :: (lexer : *Lexer, token_kind : Token_Kind) -> *Token { return new_token(lexer, token_kind); } skip_whitespace :: (lexer : *Lexer) { while true { if is_at_end(lexer) return; c := peek_char(lexer); if c == { case #char " "; { lexer.current_column += 1; advance(lexer); continue; } case #char "\r"; #through; case #char "\t"; { advance(lexer); continue; } case #char "\n"; { advance(lexer); lexer.current_line += 1; lexer.current_column = 0; continue; } case #char "/"; { next := peek_next_char(lexer); if next == #char "/" { while peek_char(lexer) != #char "\n" && !is_at_end(lexer) { advance(lexer); } continue; } else { return; } } } return; } } advance :: (using lexer : *Lexer) -> u8 { c := input.data[cursor]; cursor += 1; return c; } scan_next_token :: (lexer : *Lexer) -> *Token { skip_whitespace(lexer); lexer.start = lexer.cursor; if is_at_end(lexer) return make_token(lexer, .TOKEN_EOF); c := advance(lexer); if c == #char "#" return directive(lexer); if is_alpha(c) return identifier(lexer); if is_digit(c) return number(lexer); if c == { case #char "\""; { c = advance(lexer); // lexer.start = lexer.cursor; while c != #char "\"" { c = advance(lexer); } // lexer.cursor -= 1; tok := make_string(lexer); // advance(lexer); return tok; } case #char "+"; { if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_PLUSEQUALS); return make_token(lexer, .TOKEN_PLUS); } case #char "-"; { if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_MINUSEQUALS); if match_character(lexer, #char ">") return make_token(lexer, .TOKEN_ARROW); return make_token(lexer, .TOKEN_MINUS); } case #char "*"; { if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_TIMESEQUALS); return make_token(lexer, .TOKEN_STAR); } case #char "/"; { if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_DIVEQUALS); return make_token(lexer, .TOKEN_SLASH); } case #char "%"; { if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_MODEQUALS); return make_token(lexer, .TOKEN_MOD); } case #char ":"; { if match_character(lexer, #char ":") return make_token(lexer, .TOKEN_DOUBLECOLON); return make_token(lexer, .TOKEN_COLON); } case #char "@"; { return make_token(lexer, .TOKEN_AT); } case #char "|"; { if match_character(lexer, #char "|") return make_token(lexer, .TOKEN_LOGICALOR); } case #char "&"; { if match_character(lexer, #char "&") return make_token(lexer, .TOKEN_LOGICALAND); } case #char "!"; { if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_ISNOTEQUAL); } case #char "="; { if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_ISEQUAL); return make_token(lexer, .TOKEN_ASSIGN); } case #char ">"; { if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_GREATEREQUALS); return make_token(lexer, .TOKEN_GREATER); } case #char "<"; { if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_LESSEQUALS); return make_token(lexer, .TOKEN_LESS); } case #char "{"; { return make_token(lexer, .TOKEN_LEFTBRACE); } case #char "}"; { return make_token(lexer, .TOKEN_RIGHTBRACE); } case #char "("; { return make_token(lexer, .TOKEN_LEFTPAREN); } case #char ")"; { return make_token(lexer, .TOKEN_RIGHTPAREN); } case #char "["; { return make_token(lexer, .TOKEN_LEFTBRACKET); } case #char "]"; { return make_token(lexer, .TOKEN_RIGHTBRACKET); } case #char ";"; return make_token(lexer, .TOKEN_SEMICOLON); case #char ","; return make_token(lexer, .TOKEN_COMMA); case #char "."; { if match_character(lexer, #char ".") return make_token(lexer, .TOKEN_DOTDOT); return make_token(lexer, .TOKEN_DOT); } } s : string = .{ count = 1, data = *c }; record_error(lexer, tprint("Invalid token: %", s)); return null; // return error_token(lexer, tprint("Invalid token: %", s)); } lex :: (result : *Compile_Result) { if result.had_error { return; } lexer : Lexer; lexer.result = result; init_lexer_from_string(*lexer, result.file.source); lexer.path = result.file.path; token : *Token = scan_next_token(*lexer); while token && token.kind != .TOKEN_EOF { token = scan_next_token(*lexer); } } init_lexer_from_string :: (lexer : *Lexer, input : string) { ok := read_input_from_string(lexer, input); if !ok { record_error(lexer, "Unable to initialize from string\n"); lexer.result.had_error = true; } } init_lexer_from_file :: (lexer : *Lexer, file_path : string) { ok := read_input_from_file(lexer, file_path); if !ok { record_error(lexer, tprint("Unable to read file: %\n", file_path)); lexer.result.had_error = true; } } read_input_from_string :: (lexer : *Lexer, input : string) -> bool { lexer.input = input; lexer.cursor = 0; lexer.start = 0; lexer.current_line = 1; lexer.current_column = 0; return true; } read_input_from_file :: (lexer : *Lexer, file_path : string) -> bool { assert(file_path != ""); value, success := read_entire_file(file_path, true, true); if !success { free(value); return false; } lexer.path = copy_string(file_path); lexer.input = value; lexer.cursor = 0; lexer.start = 0; lexer.current_line = 1; lexer.current_column = 0; return true; } // =========================================================== // Pretty printing pretty_print_token :: (token : *Token, builder : *String_Builder) { MAX :: 21; kind_name := enum_names(Token_Kind)[cast(int)token.kind]; diff := MAX - kind_name.count; print_to_builder(builder, "{kind = %; ", token.kind); for i : 0..diff - 1 { append(builder, " "); } append_to_length :: (builder : *String_Builder, number : int) { if number < 10 { append(builder, " "); } else if number < 100 { append(builder, " "); } else if number < 1000 { append(builder, " "); } else if number < 10000 { append(builder, " "); } } print_to_builder(builder, "; index = %", token.index); append_to_length(builder, token.index); print_to_builder(builder, "; length = %", token.length); append_to_length(builder, token.length); print_to_builder(builder, "line = %", token.line); append_to_length(builder, token.line); print_to_builder(builder, "; column = %", token.column); append_to_length(builder, token.column); append(builder, "; value ='"); value_length : int; if token.kind == .TOKEN_IDENTIFIER { print_to_builder(builder, "%", token.ident_value); } else if token.kind == .TOKEN_INTLITERAL { print_to_builder(builder, "%", token.integer_value); } else if token.kind == .TOKEN_FLOATLITERAL { print_to_builder(builder, "%", token.float_value); } else if token.kind == .TOKEN_ERROR { print_to_builder(builder, "%", token.error); } else { source : string = .{ count = token.length, data = token.source }; print_to_builder(builder, "%", source); } append(builder, "'; }\n"); } pretty_print_tokens :: (lexer : *Lexer, allocator : Allocator) -> string { builder : String_Builder; init_string_builder(*builder,, allocator); token : *Token = scan_next_token(lexer); while token && token.kind != .TOKEN_EOF { pretty_print_token(token, *builder); token = scan_next_token(lexer); } return builder_to_string(*builder,, allocator); } pretty_print_tokens :: (tokens : []Token, allocator : Allocator) -> string { builder : String_Builder; init_string_builder(*builder,, allocator); for token : tokens { pretty_print_token(*token, *builder); } return builder_to_string(*builder,, allocator); } output_as_code_string :: (lexer : *Lexer, allocator : *Allocator) -> string { builder : String_Builder; new_context := context; new_context.allocator = allocator; push_context new_context { init_string_builder(*builder); // @Incomplete: Consider passing builder as argument token : *Token = scan_next_token(lexer); while token && token.kind != .TOKEN_EOF { token = scan_next_token(lexer); } return builder_to_string(*builder); } } print_token_pointer :: (builder : *String_Builder, token : Token) { for i : 0..token.column - 1 { append(builder, " "); } for i : 0..token.length - 1 { append(builder, "^"); } } print_from_source_location :: (builder : *String_Builder, source_location : Source_Range, indentation : int = 0) { current := source_location.begin; begin := source_location.begin; end := source_location.end; begin_pos := 0; token_string : string; count := end.index - begin.index + end.length; if indentation > 0 { indent(builder, indentation); for 0..count - 1 { c := begin.source[it]; if c == #char "\n" { append(builder, "\n"); indent(builder, indentation); } else { s : string; s.count = 1; s.data = *c; print_to_builder(builder, "%", s); } } } else { token_string = .{ count = count, data = begin.source }; indent(builder, indentation); print_to_builder(builder, "%", token_string); } } print_from_source_location :: (source_location : Source_Range, allocator := context.allocator, indentation : int = 0) -> string { builder : String_Builder; init_string_builder(*builder,, allocator); print_from_source_location(*builder, source_location); return builder_to_string(*builder,, allocator); } #import "Basic"; #import "File";