init commit

This commit is contained in:
2024-06-09 21:34:23 +02:00
commit b81c0af596
177 changed files with 8799 additions and 0 deletions

695
Lexing.jai Normal file
View File

@@ -0,0 +1,695 @@
Lexer :: struct {
input : string;
cursor : int;
start : int;
current_line : int;
current_column : int;
result : Lexing_Result;
path : string;
}
Lexing_Result :: struct {
tokens : [..]Token;
had_error : bool;
messages : [..]Compiler_Message;
}
Token_Kind :: enum {
TOKEN_FLOATLITERAL;
TOKEN_INTLITERAL;
TOKEN_LOGICALOR;
TOKEN_LOGICALAND;
TOKEN_ISEQUAL;
TOKEN_ISNOTEQUAL;
TOKEN_PLUSEQUALS;
TOKEN_MINUSEQUALS;
TOKEN_TIMESEQUALS;
TOKEN_DIVEQUALS;
TOKEN_MODEQUALS;
TOKEN_LESSEQUALS;
TOKEN_LESS;
TOKEN_GREATEREQUALS;
TOKEN_GREATER;
TOKEN_COLON;
TOKEN_DOUBLECOLON;
TOKEN_ASSIGN;
TOKEN_ARROW;
TOKEN_AT;
TOKEN_PLUS;
TOKEN_STAR;
TOKEN_SLASH;
TOKEN_MOD;
TOKEN_MINUS;
TOKEN_LEFTBRACE;
TOKEN_RIGHTBRACE;
TOKEN_LEFTBRACKET;
TOKEN_RIGHTBRACKET;
TOKEN_LEFTPAREN;
TOKEN_RIGHTPAREN;
TOKEN_SEMICOLON;
TOKEN_COMMA;
TOKEN_DOT;
TOKEN_IDENTIFIER;
// Keywords
TOKEN_BOOL;
TOKEN_CASE;
TOKEN_CBUFFER;
TOKEN_COLUMNMAJOR;
TOKEN_CONST;
TOKEN_CONTINUE;
TOKEN_DEFAULT;
TOKEN_DIRECTIVE;
TOKEN_DISCARD;
TOKEN_DO;
TOKEN_DOUBLE;
TOKEN_ELSE;
TOKEN_EXPORT;
TOKEN_EXTERN;
TOKEN_FALSE;
TOKEN_FOR;
TOKEN_HALF;
TOKEN_HINT;
TOKEN_IF;
TOKEN_IN;
TOKEN_INOUT;
TOKEN_INSTANCE;
TOKEN_MATRIX;
TOKEN_META;
TOKEN_OPTIONAL;
TOKEN_OUT;
TOKEN_PIXEL;
TOKEN_PROPERTIES;
TOKEN_RETURN;
TOKEN_REGISTER;
TOKEN_STRUCT;
TOKEN_SWITCH;
TOKEN_TRUE;
TOKEN_UNORM;
TOKEN_UNSIGNED;
TOKEN_UINT;
TOKEN_VECTOR;
TOKEN_VERTEX;
TOKEN_VOID;
TOKEN_WHILE;
TOKEN_EOF;
TOKEN_ERROR;
}
Token :: struct {
kind : Token_Kind;
union {
ident_value : string;
integer_value : int;
float_value : float;
string_value : string;
}
source : *u8;
line : int;
length : int;
column : int;
index : int;
error : string;
}
Source_Range :: struct {
begin : Token;
end : Token;
main_token : Token;
}
is_at_end :: (using lexer : *Lexer) -> bool {
return input.data[cursor] == #char "\0" || cursor == input.count;
}
peek_char :: (using lexer : *Lexer) -> u8 {
return input.data[cursor];
}
peek_next_char :: (using lexer : *Lexer) -> u8 {
if is_at_end(lexer) return #char "\0";
return input.data[cursor + 1];
}
match_character :: (lexer : *Lexer, expected : u8) -> bool {
if is_at_end(lexer) return false;
if lexer.input.data[lexer.cursor] != expected return false;
lexer.cursor += 1;
return true;
}
identifier :: (lexer : *Lexer) -> *Token {
while is_alpha(peek_char(lexer)) || is_digit(peek_char(lexer)) || peek_char(lexer) == #char "_" {
advance(lexer);
}
return make_identifier(lexer, identifier_kind(lexer));
}
directive :: (lexer : *Lexer) -> *Token {
advance(lexer);
while is_alpha(peek_char(lexer)) || is_digit(peek_char(lexer)) || peek_char(lexer) == #char "_" {
advance(lexer);
}
return make_directive(lexer);
}
number :: (lexer : *Lexer) -> *Token {
while is_digit(peek_char(lexer)) advance(lexer);
is_float := false;
if peek_char(lexer) == #char "." && is_digit(peek_next_char(lexer)) {
is_float = true;
advance(lexer);
f_suffix := false;
while is_digit(peek_char(lexer)) {
advance(lexer);
}
if peek_char(lexer) == #char "f" {
advance(lexer);
record_error(lexer, "We don't use 'f' suffixes for floating point values.");
return null;
}
}
if is_float {
return make_float(lexer);
}
return make_int(lexer);
}
identifier_kind :: (using lexer : *Lexer) -> Token_Kind {
length := cursor - lexer.start;
index := start;
identifier : string;
identifier.data = *input.data[start];
identifier.count = length;
if identifier == "bool" return .TOKEN_BOOL;
if identifier == "case" return .TOKEN_CASE;
if identifier == "columnmajor" return .TOKEN_COLUMNMAJOR;
if identifier == "const" return .TOKEN_CONST;
if identifier == "continue" return .TOKEN_CONTINUE;
if identifier == "default" return .TOKEN_DEFAULT;
if identifier == "directive" return .TOKEN_DIRECTIVE;
if identifier == "discard" return .TOKEN_DIRECTIVE;
if identifier == "discard" return .TOKEN_DISCARD;
if identifier == "do" return .TOKEN_DO;
if identifier == "double" return .TOKEN_DOUBLE;
if identifier == "else" return .TOKEN_ELSE;
if identifier == "export" return .TOKEN_EXPORT;
if identifier == "extern" return .TOKEN_EXTERN;
if identifier == "false" return .TOKEN_FALSE;
if identifier == "for" return .TOKEN_FOR;
if identifier == "half" return .TOKEN_HALF;
if identifier == "hint" return .TOKEN_HINT;
if identifier == "if" return .TOKEN_IF;
if identifier == "in" return .TOKEN_IN;
if identifier == "inout" return .TOKEN_INOUT;
if identifier == "instance" return .TOKEN_INSTANCE;
if identifier == "matrix" return .TOKEN_MATRIX;
if identifier == "meta" return .TOKEN_META;
if identifier == "optional" return .TOKEN_OPTIONAL;
if identifier == "out" return .TOKEN_OUT;
if identifier == "pixel" return .TOKEN_PIXEL;
if identifier == "properties" return .TOKEN_PROPERTIES;
if identifier == "return" return .TOKEN_RETURN;
if identifier == "register" return .TOKEN_REGISTER;
if identifier == "struct" return .TOKEN_STRUCT;
if identifier == "switch" return .TOKEN_SWITCH;
if identifier == "true" return .TOKEN_TRUE;
if identifier == "unorm" return .TOKEN_UNORM;
if identifier == "unsigned" return .TOKEN_UNSIGNED;
if identifier == "uint" return .TOKEN_UINT;
if identifier == "vector" return .TOKEN_VECTOR;
if identifier == "vertex" return .TOKEN_VERTEX;
if identifier == "void" return .TOKEN_VOID;
if identifier == "while" return .TOKEN_WHILE;
return .TOKEN_IDENTIFIER;
}
error_token :: (lexer : *Lexer, message : string) -> *Token {
token : *Token = new_token(lexer, .TOKEN_ERROR);
lexer.result.had_error = true;
token.error = copy_string(message);
return token;
}
record_error :: (lexer : *Lexer, message : string) {
error : Compiler_Message;
error.message_kind = .Error;
error.message = message;
error.path = lexer.path;
token := error_token(lexer, message);
source_location : Source_Range;
source_location.main_token = token;
token.length += token.column;
token.source -= token.column;
token.column = 0;
source_location.begin = token;
length := source_location.begin.column;
source_location.end = token;
array_add(*error.source_locations, source_location);
lexer.result.had_error = true;
array_add(*lexer.result.messages, error);
}
make_int :: (lexer : *Lexer) -> *Token {
token : *Token = new_token(lexer, .TOKEN_INTLITERAL);
str : string = .{ count = token.length,
data = *lexer.input.data[lexer.start] };
value, ok := string_to_int(str);
if ok {
token.integer_value = value;
}
return token;
}
make_float :: (lexer : *Lexer) -> *Token {
token : *Token = new_token(lexer, .TOKEN_FLOATLITERAL);
str : string = .{ count = token.length,
data = *lexer.input.data[lexer.start] };
value, ok := string_to_float(str);
if ok {
token.float_value = value;
}
return token;
}
make_string :: () {
}
new_token :: (lexer : *Lexer, kind : Token_Kind) -> *Token {
length := lexer.cursor - lexer.start;
token : Token;
token.kind = kind;
token.line = lexer.current_line;
token.length = length;
token.column = lexer.current_column;
token.index = lexer.cursor - token.length;
if token.length > 0 {
token.source = *lexer.input[token.index];
} else {
token.source = *lexer.input[token.index - 1];
}
lexer.current_column += length;
array_add(*lexer.result.tokens, token);
return *lexer.result.tokens[lexer.result.tokens.count - 1];
}
make_directive :: (lexer : *Lexer) -> *Token {
lexer.start += 1;
return make_identifier(lexer, .TOKEN_DIRECTIVE);
}
make_identifier :: (lexer : *Lexer, kind : Token_Kind) -> *Token {
token : *Token = new_token(lexer, kind);
name : string = .{ count = token.length,
data = *lexer.input.data[lexer.start] };
token.ident_value = name;
return token;
}
make_token :: (lexer : *Lexer, token_kind : Token_Kind) -> *Token {
return new_token(lexer, token_kind);
}
skip_whitespace :: (lexer : *Lexer) {
while true {
c := peek_char(lexer);
if c == {
case #char " "; {
lexer.current_column += 1;
advance(lexer);
continue;
}
case #char "\r"; #through;
case #char "\t"; {
advance(lexer);
continue;
}
case #char "\n"; {
advance(lexer);
lexer.current_line += 1;
lexer.current_column = 0;
continue;
}
case #char "/"; {
next := peek_next_char(lexer);
if next == #char "/" {
while peek_char(lexer) != #char "\n" && !is_at_end(lexer) {
advance(lexer);
}
continue;
} else {
return;
}
}
}
return;
}
}
advance :: (using lexer : *Lexer) -> u8 {
c := input.data[cursor];
cursor += 1;
return c;
}
scan_next_token :: (lexer : *Lexer) -> *Token {
skip_whitespace(lexer);
lexer.start = lexer.cursor;
if is_at_end(lexer) return make_token(lexer, .TOKEN_EOF);
c := advance(lexer);
if c == #char "#" return directive(lexer);
if is_alpha(c) return identifier(lexer);
if is_digit(c) return number(lexer);
if c == {
case #char "+"; {
if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_PLUSEQUALS);
return make_token(lexer, .TOKEN_PLUS);
}
case #char "-"; {
if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_MINUSEQUALS);
if match_character(lexer, #char ">") return make_token(lexer, .TOKEN_ARROW);
return make_token(lexer, .TOKEN_MINUS);
}
case #char "*"; {
if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_TIMESEQUALS);
return make_token(lexer, .TOKEN_STAR);
}
case #char "/"; {
if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_DIVEQUALS);
return make_token(lexer, .TOKEN_SLASH);
}
case #char "%"; {
if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_MODEQUALS);
return make_token(lexer, .TOKEN_MOD);
}
case #char ":"; {
if match_character(lexer, #char ":") return make_token(lexer, .TOKEN_DOUBLECOLON);
return make_token(lexer, .TOKEN_COLON);
}
case #char "@"; {
return make_token(lexer, .TOKEN_AT);
}
case #char "|"; {
if match_character(lexer, #char "|") return make_token(lexer, .TOKEN_LOGICALOR);
}
case #char "&"; {
if match_character(lexer, #char "&") return make_token(lexer, .TOKEN_LOGICALAND);
}
case #char "!"; {
if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_ISNOTEQUAL);
}
case #char "="; {
if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_ISEQUAL);
return make_token(lexer, .TOKEN_ASSIGN);
}
case #char ">"; {
if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_GREATEREQUALS);
return make_token(lexer, .TOKEN_GREATER);
}
case #char "<"; {
if match_character(lexer, #char "=") return make_token(lexer, .TOKEN_LESSEQUALS);
return make_token(lexer, .TOKEN_LESS);
}
case #char "{"; {
return make_token(lexer, .TOKEN_LEFTBRACE);
}
case #char "}"; {
return make_token(lexer, .TOKEN_RIGHTBRACE);
}
case #char "("; {
return make_token(lexer, .TOKEN_LEFTPAREN);
}
case #char ")"; {
return make_token(lexer, .TOKEN_RIGHTPAREN);
}
case #char "["; {
return make_token(lexer, .TOKEN_LEFTBRACKET);
}
case #char "]"; {
return make_token(lexer, .TOKEN_RIGHTBRACKET);
}
case #char ";"; return make_token(lexer, .TOKEN_SEMICOLON);
case #char ","; return make_token(lexer, .TOKEN_COMMA);
case #char "."; return make_token(lexer, .TOKEN_DOT);
}
s : string = .{ count = 1, data = *c };
record_error(lexer, tprint("Invalid token: %", s));
return null;
// return error_token(lexer, tprint("Invalid token: %", s));
}
pretty_print_token :: (token : *Token, builder : *String_Builder) {
MAX :: 18;
kind_name := enum_names(Token_Kind)[cast(int)token.kind];
diff := MAX - kind_name.count;
print_to_builder(builder, "{kind = %; ", token.kind);
for i : 0..diff - 1 {
append(builder, " ");
}
append_to_length :: (builder : *String_Builder, number : int) {
if number < 10 {
append(builder, " ");
} else if number < 100 {
append(builder, " ");
} else if number < 1000 {
append(builder, " ");
} else if number < 10000 {
append(builder, " ");
}
}
print_to_builder(builder, "; index = %", token.index);
append_to_length(builder, token.index);
print_to_builder(builder, "; length = %", token.length);
append_to_length(builder, token.length);
print_to_builder(builder, "line = %", token.line);
append_to_length(builder, token.line);
print_to_builder(builder, "; column = %", token.column);
append_to_length(builder, token.column);
append(builder, "; value ='");
value_length : int;
if token.kind == .TOKEN_IDENTIFIER {
print_to_builder(builder, "%", token.ident_value);
} else if token.kind == .TOKEN_INTLITERAL {
print_to_builder(builder, "%", token.integer_value);
} else if token.kind == .TOKEN_FLOATLITERAL {
print_to_builder(builder, "%", token.float_value);
} else if token.kind == .TOKEN_ERROR {
print_to_builder(builder, "%", token.error);
} else {
source : string = .{ count = token.length,
data = token.source };
print_to_builder(builder, "%", source);
}
append(builder, "'; }\n");
}
pretty_print_tokens :: (lexer : *Lexer, allocator : Allocator) -> string {
builder : String_Builder;
init_string_builder(*builder,, allocator);
token : *Token = scan_next_token(lexer);
while token && token.kind != .TOKEN_EOF {
pretty_print_token(token, *builder);
token = scan_next_token(lexer);
}
return builder_to_string(*builder,, allocator);
}
pretty_print_tokens :: (tokens : []Token, allocator : Allocator) -> string {
builder : String_Builder;
init_string_builder(*builder,, allocator);
for token : tokens {
pretty_print_token(*token, *builder);
}
return builder_to_string(*builder,, allocator);
}
output_as_code_string :: (lexer : *Lexer, allocator : *Allocator) -> string {
builder : String_Builder;
new_context := context;
new_context.allocator = allocator;
push_context new_context {
init_string_builder(*builder); // @Incomplete: Consider passing builder as argument
token : *Token = scan_next_token(lexer);
while token && token.kind != .TOKEN_EOF {
token = scan_next_token(lexer);
}
return builder_to_string(*builder);
}
}
print_token_pointer :: (builder : *String_Builder, token : Token) {
for i : 0..token.column - 1 {
append(builder, " ");
}
for i : 0..token.length - 1 {
append(builder, "^");
}
}
print_from_source_location :: (builder : *String_Builder, source_location : Source_Range, indentation : int = 0) {
current := source_location.begin;
begin := source_location.begin;
end := source_location.end;
begin_pos := 0;
token_string : string;
count := end.index - begin.index + end.length;
if indentation > 0 {
indent(builder, indentation);
for 0..count - 1 {
c := begin.source[it];
if c == #char "\n" {
append(builder, "\n");
indent(builder, indentation);
} else {
s : string;
s.count = 1;
s.data = *c;
print_to_builder(builder, "%", s);
}
}
} else {
token_string = .{ count = count, data = begin.source };
indent(builder, indentation);
print_to_builder(builder, "%", token_string);
}
}
print_from_source_location :: (source_location : Source_Range, allocator := context.allocator, indentation : int = 0) -> string {
builder : String_Builder;
init_string_builder(*builder,, allocator);
print_from_source_location(*builder, source_location);
return builder_to_string(*builder,, allocator);
}
lex :: (lexer : *Lexer, allocator : Allocator = context.allocator) -> Lexing_Result {
lexer.result.tokens.allocator = allocator;
token : *Token = scan_next_token(lexer);
while token && token.kind != .TOKEN_EOF {
token = scan_next_token(lexer);
}
return lexer.result;
}
init_lexer_from_string :: (lexer : *Lexer, input : string) {
ok := read_input_from_string(lexer, input);
if !ok {
record_error(lexer, "Unable to initialize from string\n");
lexer.result.had_error = true;
}
}
init_lexer_from_file :: (lexer : *Lexer, file_path : string) {
ok := read_input_from_file(lexer, file_path);
if !ok {
record_error(lexer, tprint("Unable to read file: %\n", file_path));
lexer.result.had_error = true;
}
}
read_input_from_string :: (lexer : *Lexer, input : string) -> bool {
lexer.input = input;
lexer.cursor = 0;
lexer.start = 0;
lexer.current_line = 1;
lexer.current_column = 0;
return true;
}
read_input_from_file :: (lexer : *Lexer, file_path : string) -> bool {
assert(file_path != "");
value, success := read_entire_file(file_path, true, true);
if !success {
free(value);
return false;
}
lexer.path = copy_string(file_path);
lexer.input = value;
lexer.cursor = 0;
lexer.start = 0;
lexer.current_line = 1;
lexer.current_column = 0;
return true;
}
#import "Basic";