commit 551ac80fec6b0c7862a669bbe1e7b3af4c11584f
parent 6dca26058da7394d2864fe10ab9dce9f95ab7dca
Author: m21c <ho*******@gmail.com>
Date: Sun, 25 Jul 2021 15:15:06 +0200
worked on record parsing + improved typecheck() function
Diffstat:
| M | compiler.c | | | 374 | +++++++++++++++++++++++++++++++++++++++++++++++++++---------------------------- |
1 file changed, 240 insertions(+), 134 deletions(-)
diff --git a/compiler.c b/compiler.c
@@ -12,10 +12,17 @@ typedef unsigned int uint;
/* - forward declarations - */
+
typedef
struct Node Node;
typedef
+struct Field Field;
+
+typedef
+struct Record Record;
+
+typedef
struct Type Type;
typedef
@@ -68,6 +75,7 @@ typedef enum Kind {
KBREAK, KCONTINUE, KGOTO, KRETURN,
KIF, KELSE, KCASE, KOF, KDO,
KFOR, KLOOP, KWHILE, KUNTIL,
+ KSTRUCT, KUNION,
/* Operators */
OSUFINC, OSUFDEC, OARRAY, OCALL, ODISP,
@@ -107,6 +115,12 @@ bool isatomnode(Kind kind)
typedef enum {
+ FENTRY = 0,
+ FALIAS,
+ FDEFAULT
+} FieldKind;
+
+typedef enum {
TERRTYPE = 1, TUNDEFINED,
TVOID, TBOOL, TINFER, TUINFER,
@@ -119,6 +133,8 @@ typedef enum {
TTUPLE,
+ TFUNCTION,
+
TMAX
} TypeKind;
@@ -216,25 +232,57 @@ struct Node {
/* ASTMT: rhs points to next stmt (linked list) */
};
+struct Field {
+ FieldKind kind;
+ SrcLoc loc;
+
+ int key;
+
+ Type *type;
+
+ size_t fieldoffset;
+
+ Type *targetrecord;
+ Field *targetfield;
+
+ /* TODO(m21c): maybe put this somewhere else */
+ Env *parentenv, *contentenv;
+ Node *content; /* init or function body */
+
+ Field *prev, *next;
+ Field *useprev, *usenext;
+};
+
+struct Record {
+ Field *fieldshead, *fieldstail;
+ Field *usehead, *usetail;
+};
+
struct Type {
TypeKind kind;
SrcLoc loc;
+ Type *target;
+
size_t size, align;
union {
+ Record *recorddata;
+
struct {
int offset, size;
} bit;
+
struct {
size_t length;
size_t elemsize;
} array;
+
Node *val;
- Type *rtarget;
+ Type *rtarget; /* for tuples and function paramlist */
} u;
- Type *target;
+ Decl *module;
};
struct Decl {
@@ -243,10 +291,11 @@ struct Decl {
Type *type;
- Env *env, *functionenv;
+ int key;
+
+ Env *parentenv, *contentenv;
Node *content; /* init or function body */
- int key;
Decl *prev, *next;
};
@@ -310,33 +359,32 @@ Source testsource;
#define defaultloc {0, 1, "<builtin>"}
Type prim[] = {
- [TERRTYPE] = {TERRTYPE, defaultloc, 0, 0, {0}, NULL},
- [TUNDEFINED] = {TUNDEFINED, defaultloc, 0, 0, {0}, NULL},
-
- [TVOID] = {TVOID, defaultloc, 0, 0, {0}, NULL},
-
- [TBOOL] = {TBOOL, defaultloc, 1, 1, {0}, NULL},
+ [TERRTYPE] = {TERRTYPE, defaultloc, NULL, 0, 0, {0}, NULL},
+ [TUNDEFINED] = {TUNDEFINED, defaultloc, NULL, 0, 0, {0}, NULL},
- [TINFER] = {TINFER, defaultloc, 4, 4, {0}, NULL},
- [TUINFER] = {TUINFER, defaultloc, 4, 4, {0}, NULL},
+ [TVOID] = {TVOID, defaultloc, NULL, 0, 0, {0}, NULL},
+ [TBOOL] = {TBOOL, defaultloc, NULL, 1, 1, {0}, NULL},
- [TS8] = {TS8, defaultloc, 1, 1, {0}, NULL},
- [TU8] = {TU8, defaultloc, 1, 1, {0}, NULL},
- [TS16] = {TS16, defaultloc, 2, 2, {0}, NULL},
- [TU16] = {TU16, defaultloc, 2, 2, {0}, NULL},
- [TS32] = {TS32, defaultloc, 4, 4, {0}, NULL},
- [TU32] = {TU32, defaultloc, 4, 4, {0}, NULL},
- [TS64] = {TS64, defaultloc, 8, 8, {0}, NULL},
- [TU64] = {TU64, defaultloc, 8, 8, {0}, NULL},
+ [TINFER] = {TINFER, defaultloc, NULL, 4, 4, {0}, NULL},
+ [TUINFER] = {TUINFER, defaultloc, NULL, 4, 4, {0}, NULL},
- [TF32] = {TF32, defaultloc, 4, 4, {0}, NULL},
- [TF64] = {TF64, defaultloc, 8, 8, {0}, NULL},
+ [TS8] = {TS8, defaultloc, NULL, 1, 1, {0}, NULL},
+ [TU8] = {TU8, defaultloc, NULL, 1, 1, {0}, NULL},
+ [TS16] = {TS16, defaultloc, NULL, 2, 2, {0}, NULL},
+ [TU16] = {TU16, defaultloc, NULL, 2, 2, {0}, NULL},
+ [TS32] = {TS32, defaultloc, NULL, 4, 4, {0}, NULL},
+ [TU32] = {TU32, defaultloc, NULL, 4, 4, {0}, NULL},
+ [TS64] = {TS64, defaultloc, NULL, 8, 8, {0}, NULL},
+ [TU64] = {TU64, defaultloc, NULL, 8, 8, {0}, NULL},
+ [TF32] = {TF32, defaultloc, NULL, 4, 4, {0}, NULL},
+ [TF64] = {TF64, defaultloc, NULL, 8, 8, {0}, NULL},
- [TPTR] = {TPTR, defaultloc, 8, 8, {0}, NULL},
- [TARRAY] = {TARRAY, defaultloc, 0, 0, {0}, NULL},
+ [TPTR] = {TPTR, defaultloc, NULL, 8, 8, {0}, NULL},
+ [TARRAY] = {TARRAY, defaultloc, NULL, 0, 0, {0}, NULL},
+ [TTUPLE] = {TTUPLE, defaultloc, NULL, 0, 0, {0}, NULL},
- [TTUPLE] = {TTUPLE, defaultloc, 0, 0, {0}, NULL},
+ [TFUNCTION] = {TFUNCTION, defaultloc, NULL, 0, 0, {0}, NULL},
};
int keywordlengths[OSTART - KSTART];
@@ -407,6 +455,7 @@ const char *nodestrings[] = {
[KDO] = "do",
[KFOR] = "for", [KLOOP] = "loop",
[KWHILE] = "while", [KUNTIL] = "until",
+ [KSTRUCT] = "struct", [KUNION] = "union",
/* Operators */
[OSUFINC] = "++", [OSUFDEC] = "--",
[OARRAY] = "[]", [OCALL] = "()",
@@ -1574,7 +1623,7 @@ makedecl(Source *source, int key, DeclKind kind) {
decl->loc = source->tok.loc;
decl->key = key;
decl->type = prim + TVOID;
- decl->functionenv = NULL;
+ decl->contentenv = NULL;
assert(currenv);
@@ -1595,7 +1644,7 @@ makedecl(Source *source, int key, DeclKind kind) {
currenv->keycache[cacheindex] |= cachebit;
- decl->env = currenv;
+ decl->parentenv = currenv;
if (currenv->tail) {
currenv->tail->next = decl;
@@ -1869,8 +1918,8 @@ declaration(Source *source, Type *ty) {
assert(decl);
- assert(decl->functionenv == NULL);
- decl->functionenv = functionenv;
+ assert(decl->contentenv == NULL);
+ decl->contentenv = functionenv;
assert(decl->content == NULL);
decl->content = result->lhs;
@@ -1904,7 +1953,7 @@ declaration(Source *source, Type *ty) {
}
bool
-isatom(Source *source) {
+isnotatom(Source *source) {
switch ((int) getkind(source)) {
case 0:
case '\n': case ',': case ';':
@@ -1912,57 +1961,124 @@ isatom(Source *source) {
case ')': case ']': case '}':
case KELSE:
case KUNTIL:
- return false;
+ return true;
}
if (getnumops(getkind(source)) && getprec(getkind(source)) != PUNARY)
- return false;
+ return true;
- return true;
+ return false;
+}
+
+bool
+checkend(Source *source, bool hastail, int needindent,
+ const char *expecterrmsg)
+{
+ if (getkind(source) == '\n') {
+ gettok(source, false);
+ if (getkind(source) == ';')
+ error(getloc(source), expecterrmsg);
+ }
+
+ if (source->lastkind == '\n' && source->lastindent < needindent)
+ return true;
+
+ if (getkind(source) == ';') {
+ gettok(source, false);
+
+ /* NOTE(m21c): used for REPL. it allows having
+ * semicolons on line-endings and nultiple
+ * adjacent semecolons in REPL-mode. */
+ if (getkind(source) == ';' || getkind(source) == '\n') {
+ /* TODO(m21c): output an error-message if not in REPL-mode */
+ }
+ }
+
+ if (isnotatom(source))
+ return true;
+
+ if (hastail && source->lastkind != '\n' && source->lastkind != ';')
+ error(getloc(source), "expected line delimiter");
+
+ return false;
}
Node *
-stmtlist(Source *source, int indent, EnvKind envkind) {
+recordbody(Source *source, int indent, EnvKind envkind) {
Node *head = NULL, *tail = NULL;
int needindent = nextindent(source, indent);
Env *env = NULL;
- /* printf("needident: %d, currindent: %d, lastindent: %d\n", needindent, currindent, lastindent); */
+ Type *type;
for (;;) {
- Node *stmt;
-
- if (getkind(source) == '\n') {
- gettok(source, false);
- if (getkind(source) == ';')
- error(getloc(source), "expected expression");
- }
+ Node *declstmt;
- if (source->lastkind == '\n' && source->lastindent < needindent)
+ if (checkend(source, !!tail, needindent,
+ "expected declaration"))
break;
- if (getkind(source) == ';') {
+ if (!env)
+ env = pushenv(source, envkind);
+
+ /* parse default values */
+ if (getkind(source) == ODISP) {
gettok(source, false);
- /* NOTE(m21c): used for REPL. it allows having
- * semicolons on line-endings and nultiple
- * adjacent semecolons in REPL-mode. */
- if (getkind(source) == ';' || getkind(source) == '\n') {
- /* TODO(m21c): output an error-message if not in REPL-mode */
+ /* parse field-declaration */
+ } else {
+ if (getkind(source) == KUSE) {
+ gettok(source, false);
}
+
+ type = getbasetype(source, 0);
+ type = gettype(source, type);
+ declstmt = declaration(source, type);
+
+ declstmt = makenode(declstmt, declstmt);
+ declstmt->kind = ASTMT;
}
- if (!isatom(source))
- break;
+ if (!tail) {
+ head = tail = declstmt;
+ } else {
+ tail->rhs = declstmt;
+ tail = declstmt;
+ }
+ }
+
+ if (env) {
+ head = makenode(&source->tok, head);
+ head->kind = ASCOPE;
+ head->u.env = env;
+ env->stmts = head;
+
+ popenv(source);
+ }
+
+ return head;
+}
+
+Node *
+stmtlist(Source *source, int indent, EnvKind envkind) {
+ Node *head = NULL, *tail = NULL;
+ int needindent = nextindent(source, indent);
+
+ Env *env = NULL;
+ /* printf("needident: %d, currindent: %d, lastindent: %d\n", needindent, currindent, lastindent); */
- if (tail && source->lastkind != '\n' && source->lastkind != ';')
- error(getloc(source), "expected line delimiter");
+ for (;;) {
+ Node *stmt;
+
+ if (checkend(source, !!tail, needindent,
+ "expected expression"))
+ break;
if (!env &&
(envkind != SFUNCTION || !source->currenv ||
source->currenv->kind != SPARAMLIST))
{
- /* NOTE(m21c): if there is already a
+ /* NOTE(m21c): if there already is a
* paramlist-environment and we want a
* function-environment, we just use
* paramlist as our function-environment.
@@ -1991,14 +2107,17 @@ stmtlist(Source *source, int indent, EnvKind envkind) {
assert(env == NULL);
source->currenv->kind = SFUNCTION;
env = source->currenv;
+
+ popenv(source);
} else if (env) {
head = makenode(&source->tok, head);
head->kind = ASCOPE;
head->u.env = env;
env->stmts = head;
+
+ popenv(source);
}
- popenv(source);
return head;
}
@@ -2006,7 +2125,6 @@ stmtlist(Source *source, int indent, EnvKind envkind) {
Node *
atom(Source *source, int flags) {
Node *lhs = NULL, *savedis = source->lastis;
- Type *ty;
int indent;
/* unary 'is'-operator */
@@ -2139,6 +2257,22 @@ atom(Source *source, int flags) {
break;
+ case KSTRUCT:
+ case KUNION:
+ indent = source->lastindent;
+ lhs = makenode(&source->tok, NULL);
+ lhs->kind = getkind(source);
+ gettok(source, false);
+ if (getkind(source) == 'I') {
+ lhs->lhs = makenode(&source->tok, NULL);
+ gettok(source, false);
+ } else {
+ error(getloc(source), "expected identifier");
+ }
+ lhs->rhs = recordbody(source, indent, SSCOPE);
+
+ break;
+
case KNOT:
lhs = makenode(&source->tok, NULL);
gettok(source, false);
@@ -2150,7 +2284,7 @@ atom(Source *source, int flags) {
case KBREAK:
case KCONTINUE:
lhs = makenode(&source->tok, NULL);
- lhs->kind = getkind(source) == KBREAK ? ABREAK : ACONTINUE;
+ lhs->kind = getkind(source);
gettok(source, true);
if (getkind(source) == ':') {
@@ -2182,7 +2316,8 @@ atom(Source *source, int flags) {
}
}
- if (isatom(source))
+ /* if is atom */
+ if (!isnotatom(source))
lhs->rhs = exprlist(source, false, NULL);
break;
@@ -2706,12 +2841,23 @@ typecheck(Env *env, Node *expr) {
switch (getnumops(expr->kind)) {
case 2:
assert(rhs);
- if (rhs->type->kind == TERRTYPE)
+ if (rhs->type->kind == TERRTYPE) {
expr->type = prim + TERRTYPE;
+ return expr;
+ } else {
+ rhs = typecheck(env, rhs);
+ }
case 1:
assert(lhs);
- if (lhs->type->kind == TERRTYPE)
+ if (lhs->type->kind == TERRTYPE) {
expr->type = prim + TERRTYPE;
+ return expr;
+ } else {
+ lhs = typecheck(env, lhs);
+ }
+
+ if (arithtuplereorder(env, expr, getnumops(expr->kind)))
+ goto joincomma;
}
if (expr->type && expr->type->kind == TERRTYPE)
@@ -2723,11 +2869,6 @@ typecheck(Env *env, Node *expr) {
return expr;
case OPLUS: case OMINUS:
- lhs = typecheck(env, lhs);
-
- if (arithtuplereorder(env, expr, 1))
- goto joincomma;
-
/*
if (!isarithtype(lhs->type)) {
error(&lhs->loc, "expression is not of arithmentic type");
@@ -2737,15 +2878,9 @@ typecheck(Env *env, Node *expr) {
*/
expr->type = lhs->type;
- expr->lhs = conv(lhs);
- return expr;
+ goto joinunaryconv;
case OBNOT:
- lhs = typecheck(env, lhs);
-
- if (arithtuplereorder(env, expr, 1))
- goto joincomma;
-
if (!isinttype(lhs->type)) {
error(
&lhs->loc,
@@ -2757,15 +2892,9 @@ typecheck(Env *env, Node *expr) {
}
expr->type = lhs->type;
- expr->lhs = conv(lhs);
- return expr;
+ goto joinunaryconv;
case OLNOT:
- lhs = typecheck(env, lhs);
-
- if (arithtuplereorder(env, expr, 1))
- goto joincomma;
-
if (!isarithtype(lhs->type)) {
error(
&lhs->loc,
@@ -2777,8 +2906,7 @@ typecheck(Env *env, Node *expr) {
}
expr->type = prim + TBOOL;
- expr->lhs = conv(lhs);
- return expr;
+ goto joinunaryconv;
case OCAST:
/*
@@ -2794,12 +2922,6 @@ typecheck(Env *env, Node *expr) {
case OMUL: case ODIV: case OMOD:
case OADD: case OSUB:
- lhs = typecheck(env, lhs);
- rhs = typecheck(env, rhs);
-
- if (arithtuplereorder(env, expr, 2))
- goto joincomma;
-
/* usual arithmetic conversion */
if (isarithtype(lhs->type) && isarithtype(rhs->type)) {
if (lhs->type->kind < rhs->type->kind)
@@ -2816,17 +2938,9 @@ typecheck(Env *env, Node *expr) {
return expr;
}
- expr->lhs = wrap(expr->type, lhs);
- expr->rhs = wrap(expr->type, rhs);
- return expr;
+ goto joinbinarywrap;
case OBAND: case OBOR: case OXOR:
- lhs = typecheck(env, lhs);
- rhs = typecheck(env, rhs);
-
- if (arithtuplereorder(env, expr, 2))
- goto joincomma;
-
if (isinttype(lhs->type) && isinttype(rhs->type)) {
if (lhs->type->kind < rhs->type->kind) {
expr->type = rhs->type;
@@ -2842,17 +2956,9 @@ typecheck(Env *env, Node *expr) {
expr->type = prim + TERRTYPE;
}
- expr->lhs = wrap(expr->type, lhs);
- expr->rhs = wrap(expr->type, rhs);
- return expr;
+ goto joinbinarywrap;
case OLSH: case ORSH: case OARSH:
- lhs = typecheck(env, lhs);
- rhs = typecheck(env, rhs);
-
- if (arithtuplereorder(env, expr, 2))
- goto joincomma;
-
if (isinttype(lhs->type) && isinttype(rhs->type)) {
expr->type = lhs->type;
} else {
@@ -2864,19 +2970,12 @@ typecheck(Env *env, Node *expr) {
expr->type = prim + TERRTYPE;
}
- expr->lhs = wrap(expr->type, lhs); /* this should be unneeded */
- expr->rhs = wrap(expr->type, rhs);
- return expr;
+ /* should be only wrap rhs */
+ goto joinbinarywrap;
case OEQU: case ONEQ:
case OLET: case OLEQ:
case OGRT: case OGEQ:
- lhs = typecheck(env, lhs);
- rhs = typecheck(env, rhs);
-
- if (arithtuplereorder(env, expr, 2))
- goto joincomma;
-
if (isarithtype(lhs->type) && isarithtype(rhs->type)) {
expr->type = prim + TBOOL;
} else {
@@ -2888,17 +2987,9 @@ typecheck(Env *env, Node *expr) {
expr->type = prim + TERRTYPE;
}
- expr->lhs = conv(lhs);
- expr->rhs = conv(rhs);
- return expr;
+ goto joinbinaryconv;
case OLAND: case OLOR:
- lhs = typecheck(env, lhs);
- rhs = typecheck(env, rhs);
-
- if (arithtuplereorder(env, expr, 2))
- goto joincomma;
-
if (isarithtype(lhs->type) && isarithtype(rhs->type)) {
expr->type = prim + TBOOL;
} else {
@@ -2910,9 +3001,7 @@ typecheck(Env *env, Node *expr) {
expr->type = prim + TERRTYPE;
}
- expr->lhs = conv(lhs);
- expr->rhs = conv(rhs);
- return expr;
+ goto joinbinaryconv;
case OASS:
case OMULA: case ODIVA: case OMODA:
@@ -2920,12 +3009,6 @@ typecheck(Env *env, Node *expr) {
case OADDA: case OSUBA:
case OANDA:
case OORA: case OXORA:
- lhs = typecheck(env, lhs);
- rhs = typecheck(env, rhs);
-
- if (arithtuplereorder(env, expr, 2))
- goto joincomma;
-
switch ((int) expr->kind) {
case OASS:
expr->type = lhs->type;
@@ -3038,6 +3121,17 @@ typecheck(Env *env, Node *expr) {
default:
return expr;
}
+
+joinbinaryconv:
+ expr->rhs = conv(rhs);
+joinunaryconv:
+ expr->lhs = conv(lhs);
+ return expr;
+
+joinbinarywrap:
+ expr->rhs = wrap(expr->type, rhs);
+ expr->lhs = wrap(expr->type, lhs);
+ return expr;
}
Node *
@@ -3573,8 +3667,8 @@ printdeclaration(FILE *out, Decl *decl, int indent) {
n += highlight(out, HLDELIM);
n += fprintf(out, "(");
- if (decl->functionenv) {
- head = decl->functionenv->head;
+ if (decl->contentenv) {
+ head = decl->contentenv->head;
}
for (param = head; param; param = param->next) {
@@ -3794,6 +3888,18 @@ printexpr(FILE *out, Node *expr, int indent) {
break;
default:
+ n += highlight(out, HLINFO);
+ n += fprintf(out, "node(%u)", expr->kind);
+ if (expr->lhs) {
+ n += fprintf(out, " -> ");
+ n += printsubexpr(out, expr->lhs, true, indent);
+ }
+ if (expr->rhs) {
+ n += highlight(out, HLINFO);
+ n += fprintf(out, " => ");
+ n += printsubexpr(out, expr->rhs, false, indent);
+ }
+
break;
}
}