Aria

A low-level systems programming language
git clone git://git.m21c.me/Aria.git
Log | Files | Refs | LICENSE

commit 551ac80fec6b0c7862a669bbe1e7b3af4c11584f
parent 6dca26058da7394d2864fe10ab9dce9f95ab7dca
Author: m21c  <ho*******@gmail.com>
Date:   Sun, 25 Jul 2021 15:15:06 +0200

worked on record parsing + improved typecheck() function

Diffstat:
Mcompiler.c | 374+++++++++++++++++++++++++++++++++++++++++++++++++++----------------------------
1 file changed, 240 insertions(+), 134 deletions(-)

diff --git a/compiler.c b/compiler.c @@ -12,10 +12,17 @@ typedef unsigned int uint; /* - forward declarations - */ + typedef struct Node Node; typedef +struct Field Field; + +typedef +struct Record Record; + +typedef struct Type Type; typedef @@ -68,6 +75,7 @@ typedef enum Kind { KBREAK, KCONTINUE, KGOTO, KRETURN, KIF, KELSE, KCASE, KOF, KDO, KFOR, KLOOP, KWHILE, KUNTIL, + KSTRUCT, KUNION, /* Operators */ OSUFINC, OSUFDEC, OARRAY, OCALL, ODISP, @@ -107,6 +115,12 @@ bool isatomnode(Kind kind) typedef enum { + FENTRY = 0, + FALIAS, + FDEFAULT +} FieldKind; + +typedef enum { TERRTYPE = 1, TUNDEFINED, TVOID, TBOOL, TINFER, TUINFER, @@ -119,6 +133,8 @@ typedef enum { TTUPLE, + TFUNCTION, + TMAX } TypeKind; @@ -216,25 +232,57 @@ struct Node { /* ASTMT: rhs points to next stmt (linked list) */ }; +struct Field { + FieldKind kind; + SrcLoc loc; + + int key; + + Type *type; + + size_t fieldoffset; + + Type *targetrecord; + Field *targetfield; + + /* TODO(m21c): maybe put this somewhere else */ + Env *parentenv, *contentenv; + Node *content; /* init or function body */ + + Field *prev, *next; + Field *useprev, *usenext; +}; + +struct Record { + Field *fieldshead, *fieldstail; + Field *usehead, *usetail; +}; + struct Type { TypeKind kind; SrcLoc loc; + Type *target; + size_t size, align; union { + Record *recorddata; + struct { int offset, size; } bit; + struct { size_t length; size_t elemsize; } array; + Node *val; - Type *rtarget; + Type *rtarget; /* for tuples and function paramlist */ } u; - Type *target; + Decl *module; }; struct Decl { @@ -243,10 +291,11 @@ struct Decl { Type *type; - Env *env, *functionenv; + int key; + + Env *parentenv, *contentenv; Node *content; /* init or function body */ - int key; Decl *prev, *next; }; @@ -310,33 +359,32 @@ Source testsource; #define defaultloc {0, 1, "<builtin>"} Type prim[] = { - [TERRTYPE] = {TERRTYPE, defaultloc, 0, 0, {0}, NULL}, - [TUNDEFINED] = {TUNDEFINED, defaultloc, 0, 0, {0}, NULL}, - - [TVOID] = {TVOID, defaultloc, 0, 0, {0}, NULL}, - - [TBOOL] = {TBOOL, defaultloc, 1, 1, {0}, NULL}, + [TERRTYPE] = {TERRTYPE, defaultloc, NULL, 0, 0, {0}, NULL}, + [TUNDEFINED] = {TUNDEFINED, defaultloc, NULL, 0, 0, {0}, NULL}, - [TINFER] = {TINFER, defaultloc, 4, 4, {0}, NULL}, - [TUINFER] = {TUINFER, defaultloc, 4, 4, {0}, NULL}, + [TVOID] = {TVOID, defaultloc, NULL, 0, 0, {0}, NULL}, + [TBOOL] = {TBOOL, defaultloc, NULL, 1, 1, {0}, NULL}, - [TS8] = {TS8, defaultloc, 1, 1, {0}, NULL}, - [TU8] = {TU8, defaultloc, 1, 1, {0}, NULL}, - [TS16] = {TS16, defaultloc, 2, 2, {0}, NULL}, - [TU16] = {TU16, defaultloc, 2, 2, {0}, NULL}, - [TS32] = {TS32, defaultloc, 4, 4, {0}, NULL}, - [TU32] = {TU32, defaultloc, 4, 4, {0}, NULL}, - [TS64] = {TS64, defaultloc, 8, 8, {0}, NULL}, - [TU64] = {TU64, defaultloc, 8, 8, {0}, NULL}, + [TINFER] = {TINFER, defaultloc, NULL, 4, 4, {0}, NULL}, + [TUINFER] = {TUINFER, defaultloc, NULL, 4, 4, {0}, NULL}, - [TF32] = {TF32, defaultloc, 4, 4, {0}, NULL}, - [TF64] = {TF64, defaultloc, 8, 8, {0}, NULL}, + [TS8] = {TS8, defaultloc, NULL, 1, 1, {0}, NULL}, + [TU8] = {TU8, defaultloc, NULL, 1, 1, {0}, NULL}, + [TS16] = {TS16, defaultloc, NULL, 2, 2, {0}, NULL}, + [TU16] = {TU16, defaultloc, NULL, 2, 2, {0}, NULL}, + [TS32] = {TS32, defaultloc, NULL, 4, 4, {0}, NULL}, + [TU32] = {TU32, defaultloc, NULL, 4, 4, {0}, NULL}, + [TS64] = {TS64, defaultloc, NULL, 8, 8, {0}, NULL}, + [TU64] = {TU64, defaultloc, NULL, 8, 8, {0}, NULL}, + [TF32] = {TF32, defaultloc, NULL, 4, 4, {0}, NULL}, + [TF64] = {TF64, defaultloc, NULL, 8, 8, {0}, NULL}, - [TPTR] = {TPTR, defaultloc, 8, 8, {0}, NULL}, - [TARRAY] = {TARRAY, defaultloc, 0, 0, {0}, NULL}, + [TPTR] = {TPTR, defaultloc, NULL, 8, 8, {0}, NULL}, + [TARRAY] = {TARRAY, defaultloc, NULL, 0, 0, {0}, NULL}, + [TTUPLE] = {TTUPLE, defaultloc, NULL, 0, 0, {0}, NULL}, - [TTUPLE] = {TTUPLE, defaultloc, 0, 0, {0}, NULL}, + [TFUNCTION] = {TFUNCTION, defaultloc, NULL, 0, 0, {0}, NULL}, }; int keywordlengths[OSTART - KSTART]; @@ -407,6 +455,7 @@ const char *nodestrings[] = { [KDO] = "do", [KFOR] = "for", [KLOOP] = "loop", [KWHILE] = "while", [KUNTIL] = "until", + [KSTRUCT] = "struct", [KUNION] = "union", /* Operators */ [OSUFINC] = "++", [OSUFDEC] = "--", [OARRAY] = "[]", [OCALL] = "()", @@ -1574,7 +1623,7 @@ makedecl(Source *source, int key, DeclKind kind) { decl->loc = source->tok.loc; decl->key = key; decl->type = prim + TVOID; - decl->functionenv = NULL; + decl->contentenv = NULL; assert(currenv); @@ -1595,7 +1644,7 @@ makedecl(Source *source, int key, DeclKind kind) { currenv->keycache[cacheindex] |= cachebit; - decl->env = currenv; + decl->parentenv = currenv; if (currenv->tail) { currenv->tail->next = decl; @@ -1869,8 +1918,8 @@ declaration(Source *source, Type *ty) { assert(decl); - assert(decl->functionenv == NULL); - decl->functionenv = functionenv; + assert(decl->contentenv == NULL); + decl->contentenv = functionenv; assert(decl->content == NULL); decl->content = result->lhs; @@ -1904,7 +1953,7 @@ declaration(Source *source, Type *ty) { } bool -isatom(Source *source) { +isnotatom(Source *source) { switch ((int) getkind(source)) { case 0: case '\n': case ',': case ';': @@ -1912,57 +1961,124 @@ isatom(Source *source) { case ')': case ']': case '}': case KELSE: case KUNTIL: - return false; + return true; } if (getnumops(getkind(source)) && getprec(getkind(source)) != PUNARY) - return false; + return true; - return true; + return false; +} + +bool +checkend(Source *source, bool hastail, int needindent, + const char *expecterrmsg) +{ + if (getkind(source) == '\n') { + gettok(source, false); + if (getkind(source) == ';') + error(getloc(source), expecterrmsg); + } + + if (source->lastkind == '\n' && source->lastindent < needindent) + return true; + + if (getkind(source) == ';') { + gettok(source, false); + + /* NOTE(m21c): used for REPL. it allows having + * semicolons on line-endings and nultiple + * adjacent semecolons in REPL-mode. */ + if (getkind(source) == ';' || getkind(source) == '\n') { + /* TODO(m21c): output an error-message if not in REPL-mode */ + } + } + + if (isnotatom(source)) + return true; + + if (hastail && source->lastkind != '\n' && source->lastkind != ';') + error(getloc(source), "expected line delimiter"); + + return false; } Node * -stmtlist(Source *source, int indent, EnvKind envkind) { +recordbody(Source *source, int indent, EnvKind envkind) { Node *head = NULL, *tail = NULL; int needindent = nextindent(source, indent); Env *env = NULL; - /* printf("needident: %d, currindent: %d, lastindent: %d\n", needindent, currindent, lastindent); */ + Type *type; for (;;) { - Node *stmt; - - if (getkind(source) == '\n') { - gettok(source, false); - if (getkind(source) == ';') - error(getloc(source), "expected expression"); - } + Node *declstmt; - if (source->lastkind == '\n' && source->lastindent < needindent) + if (checkend(source, !!tail, needindent, + "expected declaration")) break; - if (getkind(source) == ';') { + if (!env) + env = pushenv(source, envkind); + + /* parse default values */ + if (getkind(source) == ODISP) { gettok(source, false); - /* NOTE(m21c): used for REPL. it allows having - * semicolons on line-endings and nultiple - * adjacent semecolons in REPL-mode. */ - if (getkind(source) == ';' || getkind(source) == '\n') { - /* TODO(m21c): output an error-message if not in REPL-mode */ + /* parse field-declaration */ + } else { + if (getkind(source) == KUSE) { + gettok(source, false); } + + type = getbasetype(source, 0); + type = gettype(source, type); + declstmt = declaration(source, type); + + declstmt = makenode(declstmt, declstmt); + declstmt->kind = ASTMT; } - if (!isatom(source)) - break; + if (!tail) { + head = tail = declstmt; + } else { + tail->rhs = declstmt; + tail = declstmt; + } + } + + if (env) { + head = makenode(&source->tok, head); + head->kind = ASCOPE; + head->u.env = env; + env->stmts = head; + + popenv(source); + } + + return head; +} + +Node * +stmtlist(Source *source, int indent, EnvKind envkind) { + Node *head = NULL, *tail = NULL; + int needindent = nextindent(source, indent); + + Env *env = NULL; + /* printf("needident: %d, currindent: %d, lastindent: %d\n", needindent, currindent, lastindent); */ - if (tail && source->lastkind != '\n' && source->lastkind != ';') - error(getloc(source), "expected line delimiter"); + for (;;) { + Node *stmt; + + if (checkend(source, !!tail, needindent, + "expected expression")) + break; if (!env && (envkind != SFUNCTION || !source->currenv || source->currenv->kind != SPARAMLIST)) { - /* NOTE(m21c): if there is already a + /* NOTE(m21c): if there already is a * paramlist-environment and we want a * function-environment, we just use * paramlist as our function-environment. @@ -1991,14 +2107,17 @@ stmtlist(Source *source, int indent, EnvKind envkind) { assert(env == NULL); source->currenv->kind = SFUNCTION; env = source->currenv; + + popenv(source); } else if (env) { head = makenode(&source->tok, head); head->kind = ASCOPE; head->u.env = env; env->stmts = head; + + popenv(source); } - popenv(source); return head; } @@ -2006,7 +2125,6 @@ stmtlist(Source *source, int indent, EnvKind envkind) { Node * atom(Source *source, int flags) { Node *lhs = NULL, *savedis = source->lastis; - Type *ty; int indent; /* unary 'is'-operator */ @@ -2139,6 +2257,22 @@ atom(Source *source, int flags) { break; + case KSTRUCT: + case KUNION: + indent = source->lastindent; + lhs = makenode(&source->tok, NULL); + lhs->kind = getkind(source); + gettok(source, false); + if (getkind(source) == 'I') { + lhs->lhs = makenode(&source->tok, NULL); + gettok(source, false); + } else { + error(getloc(source), "expected identifier"); + } + lhs->rhs = recordbody(source, indent, SSCOPE); + + break; + case KNOT: lhs = makenode(&source->tok, NULL); gettok(source, false); @@ -2150,7 +2284,7 @@ atom(Source *source, int flags) { case KBREAK: case KCONTINUE: lhs = makenode(&source->tok, NULL); - lhs->kind = getkind(source) == KBREAK ? ABREAK : ACONTINUE; + lhs->kind = getkind(source); gettok(source, true); if (getkind(source) == ':') { @@ -2182,7 +2316,8 @@ atom(Source *source, int flags) { } } - if (isatom(source)) + /* if is atom */ + if (!isnotatom(source)) lhs->rhs = exprlist(source, false, NULL); break; @@ -2706,12 +2841,23 @@ typecheck(Env *env, Node *expr) { switch (getnumops(expr->kind)) { case 2: assert(rhs); - if (rhs->type->kind == TERRTYPE) + if (rhs->type->kind == TERRTYPE) { expr->type = prim + TERRTYPE; + return expr; + } else { + rhs = typecheck(env, rhs); + } case 1: assert(lhs); - if (lhs->type->kind == TERRTYPE) + if (lhs->type->kind == TERRTYPE) { expr->type = prim + TERRTYPE; + return expr; + } else { + lhs = typecheck(env, lhs); + } + + if (arithtuplereorder(env, expr, getnumops(expr->kind))) + goto joincomma; } if (expr->type && expr->type->kind == TERRTYPE) @@ -2723,11 +2869,6 @@ typecheck(Env *env, Node *expr) { return expr; case OPLUS: case OMINUS: - lhs = typecheck(env, lhs); - - if (arithtuplereorder(env, expr, 1)) - goto joincomma; - /* if (!isarithtype(lhs->type)) { error(&lhs->loc, "expression is not of arithmentic type"); @@ -2737,15 +2878,9 @@ typecheck(Env *env, Node *expr) { */ expr->type = lhs->type; - expr->lhs = conv(lhs); - return expr; + goto joinunaryconv; case OBNOT: - lhs = typecheck(env, lhs); - - if (arithtuplereorder(env, expr, 1)) - goto joincomma; - if (!isinttype(lhs->type)) { error( &lhs->loc, @@ -2757,15 +2892,9 @@ typecheck(Env *env, Node *expr) { } expr->type = lhs->type; - expr->lhs = conv(lhs); - return expr; + goto joinunaryconv; case OLNOT: - lhs = typecheck(env, lhs); - - if (arithtuplereorder(env, expr, 1)) - goto joincomma; - if (!isarithtype(lhs->type)) { error( &lhs->loc, @@ -2777,8 +2906,7 @@ typecheck(Env *env, Node *expr) { } expr->type = prim + TBOOL; - expr->lhs = conv(lhs); - return expr; + goto joinunaryconv; case OCAST: /* @@ -2794,12 +2922,6 @@ typecheck(Env *env, Node *expr) { case OMUL: case ODIV: case OMOD: case OADD: case OSUB: - lhs = typecheck(env, lhs); - rhs = typecheck(env, rhs); - - if (arithtuplereorder(env, expr, 2)) - goto joincomma; - /* usual arithmetic conversion */ if (isarithtype(lhs->type) && isarithtype(rhs->type)) { if (lhs->type->kind < rhs->type->kind) @@ -2816,17 +2938,9 @@ typecheck(Env *env, Node *expr) { return expr; } - expr->lhs = wrap(expr->type, lhs); - expr->rhs = wrap(expr->type, rhs); - return expr; + goto joinbinarywrap; case OBAND: case OBOR: case OXOR: - lhs = typecheck(env, lhs); - rhs = typecheck(env, rhs); - - if (arithtuplereorder(env, expr, 2)) - goto joincomma; - if (isinttype(lhs->type) && isinttype(rhs->type)) { if (lhs->type->kind < rhs->type->kind) { expr->type = rhs->type; @@ -2842,17 +2956,9 @@ typecheck(Env *env, Node *expr) { expr->type = prim + TERRTYPE; } - expr->lhs = wrap(expr->type, lhs); - expr->rhs = wrap(expr->type, rhs); - return expr; + goto joinbinarywrap; case OLSH: case ORSH: case OARSH: - lhs = typecheck(env, lhs); - rhs = typecheck(env, rhs); - - if (arithtuplereorder(env, expr, 2)) - goto joincomma; - if (isinttype(lhs->type) && isinttype(rhs->type)) { expr->type = lhs->type; } else { @@ -2864,19 +2970,12 @@ typecheck(Env *env, Node *expr) { expr->type = prim + TERRTYPE; } - expr->lhs = wrap(expr->type, lhs); /* this should be unneeded */ - expr->rhs = wrap(expr->type, rhs); - return expr; + /* should be only wrap rhs */ + goto joinbinarywrap; case OEQU: case ONEQ: case OLET: case OLEQ: case OGRT: case OGEQ: - lhs = typecheck(env, lhs); - rhs = typecheck(env, rhs); - - if (arithtuplereorder(env, expr, 2)) - goto joincomma; - if (isarithtype(lhs->type) && isarithtype(rhs->type)) { expr->type = prim + TBOOL; } else { @@ -2888,17 +2987,9 @@ typecheck(Env *env, Node *expr) { expr->type = prim + TERRTYPE; } - expr->lhs = conv(lhs); - expr->rhs = conv(rhs); - return expr; + goto joinbinaryconv; case OLAND: case OLOR: - lhs = typecheck(env, lhs); - rhs = typecheck(env, rhs); - - if (arithtuplereorder(env, expr, 2)) - goto joincomma; - if (isarithtype(lhs->type) && isarithtype(rhs->type)) { expr->type = prim + TBOOL; } else { @@ -2910,9 +3001,7 @@ typecheck(Env *env, Node *expr) { expr->type = prim + TERRTYPE; } - expr->lhs = conv(lhs); - expr->rhs = conv(rhs); - return expr; + goto joinbinaryconv; case OASS: case OMULA: case ODIVA: case OMODA: @@ -2920,12 +3009,6 @@ typecheck(Env *env, Node *expr) { case OADDA: case OSUBA: case OANDA: case OORA: case OXORA: - lhs = typecheck(env, lhs); - rhs = typecheck(env, rhs); - - if (arithtuplereorder(env, expr, 2)) - goto joincomma; - switch ((int) expr->kind) { case OASS: expr->type = lhs->type; @@ -3038,6 +3121,17 @@ typecheck(Env *env, Node *expr) { default: return expr; } + +joinbinaryconv: + expr->rhs = conv(rhs); +joinunaryconv: + expr->lhs = conv(lhs); + return expr; + +joinbinarywrap: + expr->rhs = wrap(expr->type, rhs); + expr->lhs = wrap(expr->type, lhs); + return expr; } Node * @@ -3573,8 +3667,8 @@ printdeclaration(FILE *out, Decl *decl, int indent) { n += highlight(out, HLDELIM); n += fprintf(out, "("); - if (decl->functionenv) { - head = decl->functionenv->head; + if (decl->contentenv) { + head = decl->contentenv->head; } for (param = head; param; param = param->next) { @@ -3794,6 +3888,18 @@ printexpr(FILE *out, Node *expr, int indent) { break; default: + n += highlight(out, HLINFO); + n += fprintf(out, "node(%u)", expr->kind); + if (expr->lhs) { + n += fprintf(out, " -> "); + n += printsubexpr(out, expr->lhs, true, indent); + } + if (expr->rhs) { + n += highlight(out, HLINFO); + n += fprintf(out, " => "); + n += printsubexpr(out, expr->rhs, false, indent); + } + break; } }