From 8bfc8d13bf529693ceaa3456d698034b887579aa Mon Sep 17 00:00:00 2001 From: attilavs2 Date: Thu, 6 Mar 2025 22:13:18 +0100 Subject: [PATCH] Yacc ! --- src/byte_defs.h | 4 +- src/code_defs.h | 82 ++++++++++++++++------------- src/hash.c | 23 ++++----- src/hash.h | 13 +++-- src/main.c | 58 --------------------- src/parser.c | 134 +++++++++++++++++++++++++++++++++++++++++++----- src/parser.h | 15 ++++++ src/parser.l | 15 ++++++ src/parser.y | 11 ++++ src/types.h | 3 +- 10 files changed, 230 insertions(+), 128 deletions(-) delete mode 100755 src/main.c create mode 100644 src/parser.l create mode 100644 src/parser.y diff --git a/src/byte_defs.h b/src/byte_defs.h index 899f56c..2a7259e 100755 --- a/src/byte_defs.h +++ b/src/byte_defs.h @@ -37,7 +37,6 @@ enum OpTypes { OP_vpsh = 29 }; - enum TypeFlags { F_int = 0x1, F_fix = 0x2, @@ -68,8 +67,7 @@ typedef struct { Tag params[8]; i32 n_param; - -} FnSignature; +} FnSig; typedef struct { diff --git a/src/code_defs.h b/src/code_defs.h index 3965cfb..619ae1e 100755 --- a/src/code_defs.h +++ b/src/code_defs.h @@ -9,39 +9,43 @@ enum StatementTypes { ST_None = 0, // Not processed yet ST_Call = 1, // Any kind of function call - ST_Builtin = 2, // All special builtins (var, declarations, control flow...) + ST_Builtin = 2, // All special builtins (declarations, control flow...) ST_Const = 3, // Constant - ST_Var = 4 // Variable + ST_Var = 4, // Variable + ST_Main = 5, // Type of the root node + ST__end }; // Special expressions and ones that map directly to bytecode +// Should mesh with StatementTypes enum BuiltinStatements { - BI_assign = 0, - BI_var = 1, - BI_let = 2, - BI_if = 3, - BI_else = 4, - BI_while = 5, - BI_fn = 6, - BI_import = 7, - BI_add = 8, - BI_sub = 9, - BI_mul = 10, - BI_div = 11, - BI_mod = 12, - BI_sml = 13, - BI_sml_eq = 14, - BI_eq = 15, - BI_gt_eq = 16, - BI_gt = 17, - BI_not = 18, - BI_and = 19, - BI_or = 20, - BI_is = 21, - BI_cast = 22, - BI_len = 23, - BI_push = 24, - BI_pop = 25 + BI_assign = 6, + BI_var = 7, + BI_let = 8, + BI_if = 9, + BI_else = 10, + BI_while = 11, + BI_fn = 12, + BI_import = 13, + BI_add = 14, + BI_sub = 15, + BI_mul = 16, + BI_div = 17, + BI_mod = 18, + BI_sml = 19, + BI_sml_eq = 20, + BI_eq = 21, + BI_gt_eq = 22, + BI_gt = 23, + BI_not = 24, + BI_and = 25, + BI_or = 26, + BI_is = 27, + BI_cast = 28, + BI_len = 29, + BI_push = 30, + BI_pop = 31, + BI__end }; struct Statement; @@ -49,15 +53,18 @@ struct Statement; struct Statement { i32 type; - i32 is_const; // Statement is constant, != is a constant + i32 is_const; // Statement is constant, != is a constant - TODO : implem struct Statement *children; i32 child_n; union { Value cons; - i32 fn_id; - i32 builtin_type; - i32 var_id; + struct { + i32 var_id; + Tag var_type; + } + FnSig *func; }; + }; typedef struct Statement Statement; @@ -69,11 +76,14 @@ typedef struct { i32 curr_line; i32 curr_column; - i32 stack_size; - i32 max_statment; - i32 curr_statement; + i32 max_statement; // Unused while parsing + i32 curr_statement; + HashMap symbol_map; - char **pos_stack; // Pointers to text pos of statements, freed after parsing + + i32 stack_size; Statement statements[]; } ASTStack; + +_Static_assert(BI_assign == ST__end); diff --git a/src/hash.c b/src/hash.c index 80460ec..2455438 100755 --- a/src/hash.c +++ b/src/hash.c @@ -10,7 +10,6 @@ int heap_hashmap(HashMap *map, i32 size){ if(size < 32) return 1; map->curr_len = size; - map->curr_id = 0; map->buffer = malloc(sizeof(MapItem)*size); map->bit_free = malloc(sizeof(u32)*size/32); @@ -18,6 +17,7 @@ int heap_hashmap(HashMap *map, i32 size){ return 1; memset(map->bit_free, 0, sizeof(u32)*size/32); + memset(map->buffer, 0, sizeof(MapItem)*size); return 0; } @@ -52,7 +52,7 @@ void set_bit(u32 *bitmap, i32 pos){ bitmap[pos/32] |= (1<<(pos%32)); } -i32 hashmap_insert(HashMap *map, char *str){ +i32 hashmap_insert(HashMap *map, char *str, i32 value){ #if DEBUG > 0 float load_factor = (float)(map->item_n+1)/(float)(map->curr_len); //printf("%f\n", load_factor); @@ -66,18 +66,18 @@ i32 hashmap_insert(HashMap *map, char *str){ if(map->item_n+1 >= map->curr_len) return HASH_NULL; i32 hsh = hash(map->curr_len, str); - i32 match = hashmap_get(map, str); - if(match > 0){ + MapItem *match = hashmap_get(map, str); + if(match){ return match; } if(!map->bit_free[hsh/32]){ map->buffer[hsh].hash = hsh; - map->buffer[hsh].id = map->curr_id++; + map->buffer[hsh].value = map->curr_id++; strncpy(map->buffer[hsh].str, str, 32); set_bit(map->bit_free, hsh); map->item_n++; - return map->buffer[hsh].id; + return &map->buffer[hsh]; } i32 pos = hsh; @@ -95,17 +95,16 @@ i32 hashmap_insert(HashMap *map, char *str){ if(!taken){ map->buffer[pos].hash = hsh; - map->buffer[pos].id = map->curr_id++; strncpy(map->buffer[pos].str, str, 32); set_bit(map->bit_free, hsh); map->item_n++; - return map->buffer[pos].id; + return &map->buffer[pos]; } - return HASH_NULL; + return NULL; } -i32 hashmap_get(HashMap *map, char *str){ +MapItem *hashmap_get(HashMap *map, char *str){ i32 fhash = hash(map->curr_len, str); i32 pos = fhash; i32 match; @@ -120,7 +119,7 @@ i32 hashmap_get(HashMap *map, char *str){ if(match) - return map->buffer[pos].id; + return &map->buffer[pos]; else - return -1; + return NULL; } diff --git a/src/hash.h b/src/hash.h index 939cf04..97fad92 100755 --- a/src/hash.h +++ b/src/hash.h @@ -14,6 +14,8 @@ typedef struct { i32 hash; // Hashs are internally 24bit for bytecode ops char str[32]; i32 id; + void *fnsig; + Tag type; } MapItem; @@ -23,11 +25,12 @@ typedef struct { u32 *bit_free; // Bit map to track usage i32 curr_len; // In items i32 item_n; - i32 curr_id; i32 is_heap; // TODO } HashMap; +// Returns non-zero on error +// Size must be >= 32 int heap_hashmap(HashMap *map, i32 size); void free_hashmap(HashMap *map); @@ -35,7 +38,9 @@ void free_hashmap(HashMap *map); // Max is max value of hash i32 hash(i32 max, char *str); -// Returns hash - if error returns HASH_NULL -i32 hashmap_insert(HashMap *map, char *str); +// Returns inserted MapItem, if error NULL +// If str is duplicate, returns the original +MapItem *hashmap_insert(HashMap *map, char *str, i32 value); -i32 hashmap_get(HashMap *map, char *str); +// Returns NULL if error/not found +MapItem *hashmap_get(HashMap *map, char *str); diff --git a/src/main.c b/src/main.c deleted file mode 100755 index 28d366f..0000000 --- a/src/main.c +++ /dev/null @@ -1,58 +0,0 @@ -#include -#include -#include -#include -#include - -#include "types.h" -#include "byte_defs.h" -#include "hash.h" - -#define STR_N 20 - -int main(){ - - HashMap map; - if(heap_hashmap(&map, 256) ){ - printf("Failed to alloc hashmap\n"); - return 1; - } - - char *strings[] = { - "Je mange fromage", - "Les chausettes de l'archiducess", - "sont elles seches, archiseches", - "Now, for something completly dif", - "ferent.", - "Nobody expects the spanish inqui", - "sition !", - "Il est 4H du matin", - "very-long-and-verbose-function", - "Mais punaise de flute", - "Mais ou sont passes le couteau", - "et le bebe ?!", - "Surement le bebe ne peut pas", - "trouver les explosifs ici", - "Panoramix a fait de la potion", - "Pourquoi est ce que je ne peux", - "pas avoir une mitrailleuse", - "123456789ABCDEFGHIJKLMNOPQRSTUVX", - "a", - "b" - }; - - int ids[STR_N]; - - for(int i = 0; i < STR_N; i++) - ids[i] = hashmap_insert(&map, strings[i]); - i32 res = 0; - for(int i = 0; i < STR_N; i++){ - res = ids[i] - hashmap_get(&map, strings[i]); - printf("%s : %d:%d\n", strings[i], hashmap_get(&map, strings[i]),ids[i]); - } - assert(!res); - - free_hashmap(&map); - - return 0; -} diff --git a/src/parser.c b/src/parser.c index 6251510..1c9ceda 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,4 +1,6 @@ +#include #include +#include #include "types.h" #include "byte_defs.h" @@ -6,21 +8,122 @@ #include "parser.h" #include "hash.h" -ASTStack *make_stack(){ - ASTStack *ret = malloc(sizeof(ASTStack)+sizeof(Statement)*2048); - if(!ret) - return NULL; - ret->pos_stack = malloc(sizeof(void*)*2048); - if(!ret->pos_stack) - return NULL; - if(heap_hashmap(&ret->symbol_map, SYMBOL_MAP_S)) - return NULL; - ret->stack_size = 2048; - return ret; +ASTStack *stack; + +int make_stack(){ + stack = malloc(sizeof(ASTStack)+sizeof(Statement)*2048); + if(!stack) + return 1; + if(heap_hashmap(&stack->symbol_map, SYMBOL_MAP_S)) + return 1; + stack->stack_size = 2048; + + return 0; } +Statement *make_iconst(i32 val){ + Statement *stat = &stack->statements[stack->curr_statement++]; + stat->type = ST_Const; + stat->value = {{.v4B = {val}},{.is_array = 0, .type = T_int}}; + stat->child_n = 0; + return stat; +} + +Statement *declare(i32 type, Tag vtype, char *name){ + Statement *stat = &stack->statements[stack->curr_statement++]; + stat->type = type; + if(hashmap_get(&stack->symbol_map, name)){ + return NULL; + } + MapItem *ret = hashmap_insert(&stack->symbol_map, name); + ret->type = vtype; + stat->var_id = ret->id; + stat->var_type = vtype; + stat->child_n = 0; + + return stat; +} + +Statement *variable_get(char *name){ + Statement *stat = &stack->statements[stack->curr_statement++]; + MapItem *ret = hashmap_get(&stack->symbol_map, name); + if(!ret) + return NULL; + stat->type = ST_Var; + stat->var_id = ret->id; + stat->var_type = ret->type; + stat->child_n = 0; + return stat; +} + +FnSig assign_sig = {.n_param = 1}; + +Statement *make_operation(i32 type, i32 extrainf, char *name, i32 nparam + Statement *param, ...){ + Statement *stat = &stack->statements[stack->curr_statement++]; + MapItem *ret = hashmap_get(&stack->symbol_map, name); + if(type != BI_assign && ret->type != T_fn) + return NULL; + stat->type = type; + FnSig *fnsig = NULL; + if(type == BI_assign){ + assign_sig.params[0] = ret->var_type; + fnsig = &assign_sig; + } + else { + fnsig = ret->fnsig; + } + + va_list prm; + va_start(prm, param); + for(int i = 0; i < nparam; i++){ + Statement *prm_n = va_arg(prm, Statement*); + + } +} + +/* i32 global_parenth_d; +int extract_symbol(ASTStack *stack, Statment *parent, i32 is_base, char *str){ + i32 pos = 0; + i32 type = ST_Var; + i32 is_str = 0; + char ch = str[0]; + if(isdigit(ch)) + type = ST_Const; + if(ch == '"'){ + type = ST_Const; + is_str = 1; + } + if(is_base && type == ST_Const) + return -1; + while(ch && (is_str && ch != '"')){ + if(ch == '\n' + if(!is_str && (ch == ' '|| ch == '(' || ch == ')')) + break; + pos++; + ch = str[pos]; + } + if(type != ST_Const){ + // TODO : Replace + char *nstr = strndup(str, pos); + + MapItem *ret = hashmap_get(&stack->symbol_map, nstr); + if(!ret) // Undeclared identifier + return -2; + parent->type = ret->type; + if(type == ST_Call) + parent->func = ret->ptr; + if(type == ST_Var) + parent->symb_id = ret->id; + + free(nstr); + } + + return pos; +} + // Does the final transformation -> recursion end int parse_statement_final(ASTStack *stack){ @@ -32,7 +135,7 @@ int parse_statement(ASTStack *stack){ Statement *stat = &stack->statements[curr]; stat->children = &stack->statements[stack->max_statement]; stat->child_n = 0; - i32 parent_d = 0; + i32 parenth_d = 0; char **curr_pos = &stack->pos_stack[curr]; char ch = ' '; i32 statement_complete; @@ -50,8 +153,10 @@ int parse_statement(ASTStack *stack){ case '(': parenth_d++; + global_parenth_d++; // It's another statment than our main one if(parenth_d > 1){ + stack->pos_stack[stack->max_statement] = *curr_pos; stack->max_statement++; stat->child_n++; } @@ -68,7 +173,7 @@ int parse_statement(ASTStack *stack){ break; } ch = **curr_pos; - *curr_pos++; + (*curr_pos)++; stack->curr_column++; } while(ch); @@ -85,6 +190,7 @@ ASTStack *parse(char *text){ printf("Couldn't allocate parse buffer !\n"); return NULL; } + //TODO : Insert builtins here i32 ret = 0; stack->pos_stack[0] = text; @@ -93,4 +199,4 @@ ASTStack *parse(char *text){ } while(!ret); return stack; -} +}*/ diff --git a/src/parser.h b/src/parser.h index 0d0a58e..6faa689 100644 --- a/src/parser.h +++ b/src/parser.h @@ -3,3 +3,18 @@ #include "code_defs.h" #pragma once + +int make_stack(); + +Statement *make_iconst(i32 val); + +//TODO +Statement *make_fconst(float val); +Statement *make_strconst(char *str); + +Statement *declare(i32 type, Tag vtype, char *name); + +Statement *variable_get(char *name); + +Statement *make_operation(i32 type, i32 extainf, char *str, i32 nparam + Statement *param, ...); diff --git a/src/parser.l b/src/parser.l new file mode 100644 index 0000000..b3ef68c --- /dev/null +++ b/src/parser.l @@ -0,0 +1,15 @@ +%{ + #include + #include "types.h" + #include "code_defs.h" + #include "parser.h" + #include "y.tab.h" + +%} + +%% + + + + +%% diff --git a/src/parser.y b/src/parser.y new file mode 100644 index 0000000..6ee3368 --- /dev/null +++ b/src/parser.y @@ -0,0 +1,11 @@ +%{ + #include + #include "types.h" + #include "code_defs.h" +%} + + + +%% + +%% diff --git a/src/types.h b/src/types.h index 4b0a3ef..8f20ff4 100755 --- a/src/types.h +++ b/src/types.h @@ -25,6 +25,7 @@ enum ValTypes { T_fix = 2, T_float = 3, T_str = 4, - T_fn = 5 + T_fn = 5, + T_any = 6 //Used internally for functions taking any type };