Working yacc parsing

This commit is contained in:
attilavs2 2025-03-07 21:40:30 +01:00
parent 4492720a1c
commit be2c4b95df
10 changed files with 225 additions and 262 deletions

6
.gitignore vendored
View file

@ -4,3 +4,9 @@ build-tmp/
*.amd64
*.exe
*.swp
#yacc/lex files
*.yy.c
*.gv
*.tab.h
*.tab.c

View file

@ -118,3 +118,4 @@ typedef struct {
#ifndef win
_Static_assert(sizeof(Value) == 8);
#endif
_Static_assert(sizeof(Tag) <= sizeof(i32));

View file

@ -48,20 +48,19 @@ enum BuiltinStatements {
BI__end
};
struct Statement;
struct Statement {
i32 type;
i32 is_const; // Statement is constant, != is a constant - TODO : implem
struct Statement *children;
void **children;
i32 child_n;
union {
Value cons;
struct {
i32 var_id;
Tag var_type;
}
};
FnSig *func;
};

View file

@ -52,7 +52,7 @@ void set_bit(u32 *bitmap, i32 pos){
bitmap[pos/32] |= (1<<(pos%32));
}
i32 hashmap_insert(HashMap *map, char *str, i32 value){
MapItem *hashmap_insert(HashMap *map, char *str){
#if DEBUG > 0
float load_factor = (float)(map->item_n+1)/(float)(map->curr_len);
//printf("%f\n", load_factor);
@ -64,7 +64,7 @@ i32 hashmap_insert(HashMap *map, char *str, i32 value){
#endif
if(map->item_n+1 >= map->curr_len)
return HASH_NULL;
return NULL;
i32 hsh = hash(map->curr_len, str);
MapItem *match = hashmap_get(map, str);
if(match){
@ -73,7 +73,6 @@ i32 hashmap_insert(HashMap *map, char *str, i32 value){
if(!map->bit_free[hsh/32]){
map->buffer[hsh].hash = hsh;
map->buffer[hsh].value = map->curr_id++;
strncpy(map->buffer[hsh].str, str, 32);
set_bit(map->bit_free, hsh);
map->item_n++;

View file

@ -15,7 +15,7 @@ typedef struct {
char str[32];
i32 id;
void *fnsig;
Tag type;
i32 type;
} MapItem;
@ -40,7 +40,7 @@ i32 hash(i32 max, char *str);
// Returns inserted MapItem, if error NULL
// If str is duplicate, returns the original
MapItem *hashmap_insert(HashMap *map, char *str, i32 value);
MapItem *hashmap_insert(HashMap *map, char *str);
// Returns NULL if error/not found
MapItem *hashmap_get(HashMap *map, char *str);

149
src/parse_utils.c Normal file
View file

@ -0,0 +1,149 @@
#include <ctype.h>
#include <stdio.h>
#include <stdarg.h>
#include <string.h>
#include <assert.h>
#include "types.h"
#include "byte_defs.h"
#include "code_defs.h"
#include "parser.h"
#include "hash.h"
ASTStack *stack;
int make_stack(){
stack = malloc(sizeof(ASTStack)+sizeof(Statement)*2048);
if(!stack)
return 1;
if(heap_hashmap(&stack->symbol_map, SYMBOL_MAP_S))
return 1;
stack->stack_size = 2048;
return 0;
}
char *bi_type_names[6] = {
"",
"int",
"fix",
"float",
"str",
"fn"
};
Tag get_type(char *str){
puts("get_type");
for(int i = 1; i < 6; i++){
if(!strcmp(bi_type_names[i], str))
return (Tag){0, i};
}
return (Tag){0, 0};
}
Statement *make_iconst(i32 val){
puts("make_iconst");
Statement *stat = &stack->statements[stack->curr_statement++];
stat->type = ST_Const;
stat->cons = (Value){{.v4B = {val}},{.is_array = 0, .type = T_int}};
stat->child_n = 0;
return stat;
}
Statement *declare(i32 type, Tag vtype, char *name){
puts("declare");
Statement *stat = &stack->statements[stack->curr_statement++];
stat->type = type;
if(hashmap_get(&stack->symbol_map, name)){
yyerror("Redeclaring existing identifier");
}
MapItem *ret = hashmap_insert(&stack->symbol_map, name);
ret->type = *((i32*)&vtype);
stat->var_id = ret->id;
stat->var_type = vtype;
stat->child_n = 0;
return stat;
}
Statement *variable_get(char *name){
puts("variable_get");
Statement *stat = &stack->statements[stack->curr_statement++];
MapItem *ret = hashmap_get(&stack->symbol_map, name);
if(!ret)
yyerror("Undefined identifier");
stat->type = ST_Var;
stat->var_id = ret->id;
stat->var_type = *((Tag*)&ret->type);
stat->child_n = 0;
return stat;
}
Statement *make_block(Statement *first){
puts("make_block");
Statement *stat = &stack->statements[stack->curr_statement++];
stat->type = ST_Block;
stat->children = malloc(sizeof(void*));
stat->children[0] = first;
stat->child_n = 1;
return stat;
}
Statement *add_block(Statement *block, Statement *add){
puts("add_block");
block->children = realloc(block->children, sizeof(void*)*(block->child_n+1));
block->children[block->child_n++] = add;
return block;
}
FnSig assign_sig = {.n_param = 1};
FnSig bi_sig[] = {
};
Statement *make_operation(i32 type, i32 extrainf, char *name, i32 nparam,
Statement *param, ...){
printf("make_operation: ", name);
Statement *stat = &stack->statements[stack->curr_statement++];
if(nparam)
stat->children = malloc(sizeof(void*)*nparam);
stat->child_n = 0;
FnSig *fnsig = NULL;
if(!type){
i32 len = strcspn(name, " \t\n");
char *name2 = strndup(name, len);
puts(name2);
MapItem *ret = hashmap_get(&stack->symbol_map, name2);
if(!ret)
yyerror("Undefined identifer");
if(nparam > 1 && ret->type != T_fn)
yyerror("Too many parameters to assignement");
if(ret->type != T_fn){
type = BI_assign;
assign_sig.params[0] = *((Tag*)&ret->type);
fnsig = &assign_sig;
}
else {
fnsig = ret->fnsig;
}
if(nparam > fnsig->n_param)
yyerror("Too many parameters to function");
}
stat->type = type;
va_list prm;
va_start(prm, param);
for(int i = 0; i < fnsig->n_param; i++){
Statement *prm_n = va_arg(prm, Statement*);
stat->children[stat->child_n++] = prm_n;
stack->curr_statement++;
}
return stat;
}
void set_entry_point(Statement *statement){
printf("set_entry_point\n");
}

View file

@ -1,221 +0,0 @@
#include <ctype.h>
#include <stdio.h>
#include <stdarg.h>
#include "types.h"
#include "byte_defs.h"
#include "code_defs.h"
#include "parser.h"
#include "hash.h"
ASTStack *stack;
int make_stack(){
stack = malloc(sizeof(ASTStack)+sizeof(Statement)*2048);
if(!stack)
return 1;
if(heap_hashmap(&stack->symbol_map, SYMBOL_MAP_S))
return 1;
stack->stack_size = 2048;
return 0;
}
Statement *make_iconst(i32 val){
Statement *stat = &stack->statements[stack->curr_statement++];
stat->type = ST_Const;
stat->value = {{.v4B = {val}},{.is_array = 0, .type = T_int}};
stat->child_n = 0;
return stat;
}
Statement *declare(i32 type, Tag vtype, char *name){
Statement *stat = &stack->statements[stack->curr_statement++];
stat->type = type;
if(hashmap_get(&stack->symbol_map, name)){
return NULL;
}
MapItem *ret = hashmap_insert(&stack->symbol_map, name);
ret->type = vtype;
stat->var_id = ret->id;
stat->var_type = vtype;
stat->child_n = 0;
return stat;
}
Statement *variable_get(char *name){
Statement *stat = &stack->statements[stack->curr_statement++];
MapItem *ret = hashmap_get(&stack->symbol_map, name);
if(!ret)
return NULL;
stat->type = ST_Var;
stat->var_id = ret->id;
stat->var_type = ret->type;
stat->child_n = 0;
return stat;
}
Statement *make_block(Statement *first){
Statement *stat = &stack->statements[stack->curr_statement++];
stat->type = ST_Block;
stat->children = first;
stat->child_n = 1;
return stat;
}
Statement *add_block(Statement *block, Statement *add){
assert(add == &block->children[block->child_n]);
block->child_n++;
return block;
}
FnSig assign_sig = {.n_param = 1};
Statement *make_operation(i32 type, i32 extrainf, char *name, i32 nparam
Statement *param, ...){
Statement *stat = &stack->statements[stack->curr_statement++];
stat->children = &stack->statements[stack->curr_statement];
stat->child_n = 0;
MapItem *ret = hashmap_get(&stack->symbol_map, name);
if(type != BI_assign && ret->type != T_fn)
return NULL;
stat->type = type;
FnSig *fnsig = NULL;
if(type == BI_assign){
assign_sig.params[0] = ret->var_type;
fnsig = &assign_sig;
}
else {
fnsig = ret->fnsig;
}
va_list prm;
va_start(prm, param);
if(nparam > fnsig->nparam)
return NULL;
for(int i = 0; i < nparam; i++){
Statement *prm_n = va_arg(prm, Statement*);
stat->children[stat->child_n++] = prm_n;
stack->curr_statement++;
}
}
/*
i32 global_parenth_d;
int extract_symbol(ASTStack *stack, Statment *parent, i32 is_base, char *str){
i32 pos = 0;
i32 type = ST_Var;
i32 is_str = 0;
char ch = str[0];
if(isdigit(ch))
type = ST_Const;
if(ch == '"'){
type = ST_Const;
is_str = 1;
}
if(is_base && type == ST_Const)
return -1;
while(ch && (is_str && ch != '"')){
if(ch == '\n'
if(!is_str && (ch == ' '|| ch == '(' || ch == ')'))
break;
pos++;
ch = str[pos];
}
if(type != ST_Const){
// TODO : Replace
char *nstr = strndup(str, pos);
MapItem *ret = hashmap_get(&stack->symbol_map, nstr);
if(!ret) // Undeclared identifier
return -2;
parent->type = ret->type;
if(type == ST_Call)
parent->func = ret->ptr;
if(type == ST_Var)
parent->symb_id = ret->id;
free(nstr);
}
return pos;
}
// Does the final transformation -> recursion end
int parse_statement_final(ASTStack *stack){
}
// Recursively extracts statements
int parse_statement(ASTStack *stack){
i32 curr = stack->curr_statement;
Statement *stat = &stack->statements[curr];
stat->children = &stack->statements[stack->max_statement];
stat->child_n = 0;
i32 parenth_d = 0;
char **curr_pos = &stack->pos_stack[curr];
char ch = ' ';
i32 statement_complete;
i32 ret = 0;
do {
switch(ch){
case '\n':
stack->curr_line++;
stack->curr_column = -1;
break;
case ' ':
break;
case '(':
parenth_d++;
global_parenth_d++;
// It's another statment than our main one
if(parenth_d > 1){
stack->pos_stack[stack->max_statement] = *curr_pos;
stack->max_statement++;
stat->child_n++;
}
break;
case ')':
if(parenth_d)
parenth_d--;
else // We have reached the end of our statement
goto ps_exit;
break;
default:
break;
}
ch = **curr_pos;
(*curr_pos)++;
stack->curr_column++;
} while(ch);
ps_exit:
stack->curr_statement++;
return 0;
}
ASTStack *parse(char *text){
ASTStack *stack = make_stack();
if(!stack){
printf("Couldn't allocate parse buffer !\n");
return NULL;
}
//TODO : Insert builtins here
i32 ret = 0;
stack->pos_stack[0] = text;
do {
ret = parse_statement(stack);
} while(!ret);
return stack;
}*/

View file

@ -4,8 +4,12 @@
#pragma once
void yyerror(char *s);
int make_stack();
Tag get_type(char *str);
Statement *make_iconst(i32 val);
//TODO
@ -20,6 +24,7 @@ Statement *make_block(Statement *first);
Statement *add_block(Statement *block, Statement *add);
Statement *make_operation(i32 type, i32 extainf, char *str, i32 nparam
Statement *make_operation(i32 type, i32 extainf, char *str, i32 nparam,
Statement *param, ...);
void set_entry_point(Statement *statement);

View file

@ -9,39 +9,39 @@
%%
[()] return *yytext;
":vec" return VEC;
[:][_a-zA-Z]+ {
yyvalue.str = yytext+1;
yylval.str = yytext+1;
return TYPE_U;
}
/* Statements with special syntax */
"if" return IF;
"else" return ELSE;
"fn" return FN;
"var" return VAR;
"block" return BLOCK;
":vec" return VEC;
[-+/*%_a-zA-Z][-+/*%_a-zA-Z0-9]+ {
yyvalue.str = yytext;
[-+*/%a-zA-Z]+ {
yylval.str = yytext;
return G_IDENT;
}
[0-9]+ {
yyvalue.st = make_iconst(atoi(yytext));
yylval.st = make_iconst(atoi(yytext));
return CST;
}
[<=>!] return *yytext;
[()<=>!] return *yytext;
">=" return GTEQ;
"<=" return SMEQ;
[ \t\n]+ return WHITESPACE;
[ \t\n]+ ;
. yyerror("Invalid character");
. { yyerror("Invalid character"); }
%%

View file

@ -5,69 +5,83 @@
#include "code_defs.h"
#include "parser.h"
void yyerror(char *s);
int yylex();
%}
%union {
char *str;
Statement *st;
Tag tag;
}
%token WHITESPACE
%token <str> G_IDENT TYPE_U
%token VEC
%token VAR
%token IF
%nonassoc IFX
%nonassoc ELSE
%token ELSE
%token WHILE
%token FN
%token GTEQ
%token SMEQ
%token BLOCK
%token <st> CST
%type <st> stmt stmt_list
%type <tag> type
%%
program:
function '.' { exit(0); }
|
stmt_list '.' {set_entry_point($1); exit(0); }
;
function:
function stmt { set_entry_point($2); }
|
;
/*TODO*/
type:
TYPE_U
{Tag tag = get_type($1);
if(!tag.type) yyerror("Invalid type !");
$$ = tag;
}
| VEC TYPE_U
{Tag tag = get_type($2); tag.is_array = 1; $$ = tag;}
;
stmt:
'(' G_IDENT ')'
{$$ = make_operation(ST_Call, 0, $2)}
| WHITESPACE G_IDENT
{$$ = variable_get($2);}
| WHITESPACE CST
{$$ = $2;}
| '(' VAR WHITESPACE G_IDENT ')'
{$$ = declare(BI_var,{.is_array=0,.type=T_null},$4); )}
| '(' VAR TYPE WHITESPACE G_IDENT ')'
{$$ = make_operation(ST_Call, 0, $2, 0, NULL);}
| '(' VAR G_IDENT ')'
{Tag ntag = {0,T_null}; $$ = declare(BI_var,ntag,$3);}
| '(' VAR type G_IDENT ')'
{$$ = declare(BI_var, $3, $4);}
| '(' IF stmt stmt_list ')'
{$$ = make_operation(BI_if, 0, 2, $3, $4);}
{$$ = make_operation(BI_if, 0, NULL, 2, $3, $4);}
| '(' IF stmt stmt_list ')' '(' ELSE stmt_list ')'
{Statement *st = make_block(make_operation(BI_if, 0, NULL, 2, $3, $4));
$$ = add_block(st, make_operation(BI_else, 0, NULL, 1, $8));
}
| '(' WHILE stmt stmt_list ')'
{}
| '(' FN stmt stmt_list ')'
{}
| '(' GTEQ stmt stmt ')'
{}
| '(' SMEQ stmt stmt ')'
{}
| '(' '<' stmt stmt ')'
{}
| '(' '>' stmt stmt ')'
{}
| '(' '=' stmt stmt ')'
{}
| '(' '!' stmt ')'
| '(' G_IDENT stmt_list ')'
{}
| '(' BLOCK stmt_list ')'
{$$ = $3;}
| '(' G_IDENT stmt ')'
{$$ = make_operation(ST_None, 0, $2, 1, $3);}
| G_IDENT
{$$ = variable_get($1);}
| CST
{$$ = $1;}
;
stmt_list:
@ -78,3 +92,14 @@ stmt_list:
;
%%
void yyerror(char *s){
fprintf(stderr, "Error : %s\n", s);
exit(1);
}
int main(){
make_stack();
yyparse();
return 0;
}