Aria

A low-level systems programming language
git clone git://git.m21c.me/Aria.git
Log | Files | Refs | LICENSE

commit 283ecfcc4c23b0196e824239c103fae99b67ef29
parent 049ae43efd3e365198d94cbc8e92688372e468c0
Author: m21c  <ho*******@gmail.com>
Date:   Sat, 15 Jan 2022 22:33:34 +0100

cleaned up code, especially in the tokenizer

Diffstat:
Mcompiler.c | 233+++++++++++++++++++++++++++++++++++++++++++++++--------------------------------
1 file changed, 139 insertions(+), 94 deletions(-)

diff --git a/compiler.c b/compiler.c @@ -951,13 +951,22 @@ error(SrcLoc *loc, const char *fmt, ...) #define nextindent(source, indent) \ ((indent) + (source)->tabwidth - ((indent) % (source)->tabwidth)) +#define peekchar(source) \ + ((source)->line[(source)->currloc.column]) + +#define peeknextchar(source) \ + ((source)->line[(source)->currloc.column + 1]) + +#define nextchar(source) \ + ((source)->line[++(source)->currloc.column]) + static int -tokenizealphanumeric(Source *source, register int c0) +tokenizealphanumeric(Source *source, register int ch) { int keyword; - while (isalnum(c0) || c0 == '_') - c0 = source->line[++source->currloc.column]; + while (isalnum(ch) || ch == '_') + ch = nextchar(source); keyword = getkeyword( source->line + source->tok.loc.column, @@ -1009,6 +1018,7 @@ suffixfloattype(Source *source, const char *end) if (end[1]) goto errorfloat; + } else if (!mystrcasecmp(end, "f32") || !mystrcasecmp(end, "r32")) { ty = prim + TF32; @@ -1035,10 +1045,12 @@ suffixinttype(Source *source, const char *end) case 's': case 'S': case 'i': case 'I': typeid = 0; + /* fallthrough */ case 'u': case 'U': ++end; if (*end == 0) { return prim + (typeid + TINFER); + } else if (*end == '8') { typeid += TS8; @@ -1046,19 +1058,25 @@ suffixinttype(Source *source, const char *end) goto errorint; return prim + typeid; + } else if (!strcmp(end, "16")) { return prim + (typeid + TS16); + } else if (!strcmp(end, "32")) { return prim + (typeid + TS32); + } else if (!strcmp(end, "64")) { return prim + (typeid + TS64); + } else if (!mystrcasecmp(end, "sz")) { return prim + (typeid + TSSIZE); } + /* fallthrough */ default: if (!mystrcasecmp(end, "ll")) { return prim + (typeid + TLLONG); + } else if (*end == 'l' || *end == 'L') { typeid += TLONG; @@ -1075,30 +1093,30 @@ errorint: } static int -tokenizenumber(Source *source, register int c0) +tokenizenumber(Source *source, register int ch) { - int l = c0, t = source->line[source->currloc.column+1], i, j; + int l = ch, t = peeknextchar(source), i, j; bool hasdec = false, hasexp = false; char *end; advancenum: - while (isalnum(c0) || c0 == '_' || (c0 == '.' && - source->line[source->currloc.column+1] != '.' && !hasdec)) + while (isalnum(ch) || ch == '_' || + (ch == '.' && peeknextchar(source) != '.' && !hasdec)) { - if (c0 != '_') - l = c0; - if (c0 == '.') + if (ch != '_') + l = ch; + if (ch == '.') hasdec = true; - c0 = source->line[++source->currloc.column]; + ch = nextchar(source); } - if (hasdec && !hasexp && (c0 == '+' || c0 == '-')) { + if (hasdec && !hasexp && (ch == '+' || ch == '-')) { t = tolower(t); l = tolower(l); if ((l == 'e' && t != 'x') || (l == 'p' && t == 'x')) { - c0 = source->line[++source->currloc.column]; + ch = nextchar(source); hasexp = true; goto advancenum; @@ -1134,6 +1152,7 @@ advancenum: { source->tok.u.d = strtod(source->stringbuf, &end); source->tok.type = suffixfloattype(source, end); + } else { if (mystrncasecmp(source->stringbuf, "0b", 2) == 0) { source->tok.u.u = strtoull( @@ -1156,41 +1175,41 @@ advancenum: } static int -tokenizestring(Source *source, register int c0) +tokenizestring(Source *source, register int ch) { - int delim = c0, j; + int delim = ch, j; - c0 = source->line[++source->currloc.column]; + ch = nextchar(source); source->tok.loc.column = source->currloc.column; j = source->currloc.column; - while (c0 != delim && c0 != 0) { - if (c0 == '\\') { - c0 = source->line[++source->currloc.column]; + while (ch != delim && ch != 0) { + if (ch == '\\') { + ch = nextchar(source); - switch (c0) { + switch (ch) { case '\\': - c0 = '\\'; + ch = '\\'; break; case 'n': - c0 = '\n'; + ch = '\n'; break; case 'r': - c0 = '\r'; + ch = '\r'; break; case 't': - c0 = '\t'; + ch = '\t'; break; case '\'': - c0 = '\''; + ch = '\''; break; case '"': - c0 = '"'; + ch = '"'; break; /* TODO(m21c): read more escape sequences */ @@ -1199,18 +1218,18 @@ tokenizestring(Source *source, register int c0) default: error(&source->currloc, - "invalid escape sequence '\\%c'", c0); + "invalid escape sequence '\\%c'", ch); } } - source->line[j++] = c0; - c0 = source->line[++source->currloc.column]; + source->line[j++] = ch; + ch = nextchar(source); } ++source->currloc.column; source->line[j++] = 0; - if (c0 == 0) { + if (ch == 0) { stringeol: error(&source->currloc, "unexpected end-of-line"); @@ -1231,7 +1250,7 @@ tokenizestring(Source *source, register int c0) static int gettok(Source *source) { - register int c0 = (uchar) source->line[source->currloc.column]; + register int ch = (uchar) peekchar(source); static bool hasnewline = false; source->lastkind = source->tok.kind; @@ -1243,17 +1262,18 @@ skipwhite: return source->tok.kind = 0; } - c0 = source->line[(source->currloc.column = 0)]; + source->currloc.column = 0; + ch = peekchar(source); } if (source->currloc.column) { - while (isspace(c0)) - c0 = source->line[++source->currloc.column]; + while (isspace(ch)) + ch = nextchar(source); } else { source->lastindent = 0; - while (isspace(c0)) { - if (c0 == '\t') { + while (isspace(ch)) { + if (ch == '\t') { source->lastindent = nextindent( source, source->lastindent @@ -1262,7 +1282,7 @@ skipwhite: ++source->lastindent; } - c0 = source->line[++source->currloc.column]; + ch = nextchar(source); } } @@ -1273,7 +1293,7 @@ skipwhite: source->tok.loc.column = source->currloc.column; /* get line */ - if (!c0 || c0 == '#') { + if (!ch || ch == '#') { if (hasnewline) { goto skipwhite; } else { @@ -1285,109 +1305,107 @@ skipwhite: hasnewline = false; /* identifier or keyword */ - if (isalpha(c0) || c0 == '_') { - return tokenizealphanumeric(source, c0); - } + if (isalpha(ch) || ch == '_') + return tokenizealphanumeric(source, ch); /* number literal */ - if (isdigit(c0) || (c0 == '.' && - isdigit(source->line[source->currloc.column+1]))) - { - return tokenizenumber(source, c0); - } + if (isdigit(ch) || (ch == '.' && isdigit(peeknextchar(source)))) + return tokenizenumber(source, ch); /* string & character-literal */ - if (c0 == '"' || c0 == '\'') { - return tokenizestring(source, c0); - } + if (ch == '"' || ch == '\'') + return tokenizestring(source, ch); /* delimiters */ - switch (c0) { + switch (ch) { case ',': case ';': case '@': case ':': case '{': case '}': case ']': case '[': case '(': case ')': ++source->currloc.column; - return source->tok.kind = c0; + return source->tok.kind = ch; } /* operators */ #define select(ch, then, otherwise) ( \ - source->line[source->currloc.column] == (ch) ? \ + peekchar(source) == (ch) ? \ ++source->currloc.column, (then) : \ (otherwise) \ ) - switch (source->line[source->currloc.column++]) { + + ++source->currloc.column; + switch (ch) { case '.': /* tok.kind = select('.', ORANGE, ODISP); */ - c0 = ODISP; - goto joinop; + ch = ODISP; + break; case '*': - c0 = select('=', OMULA, OMUL); - goto joinop; + ch = select('=', OMULA, OMUL); + break; case '/': - c0 = select('=', ODIVA, ODIV); - goto joinop; + ch = select('=', ODIVA, ODIV); + break; case '%': - c0 = select('=', OMODA, OMOD); - goto joinop; + ch = select('=', OMODA, OMOD); + break; case '<': - c0 = select('=', OLEQ, + ch = select('=', OLEQ, select('<', select('=', OLSHA, OLSH), OLET)); - goto joinop; + break; case '>': - c0 = select('=', OGEQ, + ch = select('=', OGEQ, select('>', select('>', select('=', OARSHA, OARSH), select('=', ORSHA, ORSH)), OGRT)); - goto joinop; + break; case '&': - c0 = select('=', OANDA, select('&', OLAND, OBAND)); - goto joinop; + ch = select('=', OANDA, select('&', OLAND, OBAND)); + break; case '+': - c0 = select('=', OADDA, select('+', OSUFINC, OADD)); - goto joinop; + ch = select('=', OADDA, select('+', OSUFINC, OADD)); + break; case '-': - c0 = select('=', OSUBA, select('-', OSUFDEC, OSUB)); - goto joinop; + ch = select('=', OSUBA, select('-', OSUFDEC, OSUB)); + break; case '|': - c0 = select('=', OORA, select('|', OLOR, OBOR)); - goto joinop; + ch = select('=', OORA, select('|', OLOR, OBOR)); + break; case '^': - c0 = select('=', OXORA, OXOR); - goto joinop; + ch = select('=', OXORA, OXOR); + break; case '!': - c0 = select('=', ONEQ, OLNOT); - goto joinop; + ch = select('=', ONEQ, OLNOT); + break; case '~': - c0 = select('=', OFLIP, OBNOT); - goto joinop; + ch = select('=', OFLIP, OBNOT); + break; case '=': - c0 = select('=', select('=', OIDENT, OEQU), OASS); - joinop: - return source->tok.kind = c0; + ch = select('=', select('=', OIDENT, OEQU), OASS); + break; default: - error(&source->currloc, "invalid input character '%c'", c0); + error(&source->currloc, "invalid input character '%c'", ch); return 'Z'; } + + return source->tok.kind = ch; #undef select } @@ -1423,7 +1441,8 @@ getunary(Kind kind) case OSUB: return OMINUS; case OSUFINC: return OINC; case OSUFDEC: return ODEC; - default: return 0; + default: + return 0; } } @@ -1458,7 +1477,8 @@ getunarysuffix(Source *source) switch (kind) { case '(': return OCALL; case '[': return OARRAY; - default: return 0; + default: + return 0; } } @@ -1521,6 +1541,7 @@ deletenode(Node *node) } else if (node->kind == ASTMT) { if (node->lhs) deletenode(node->lhs); + } else { if (node->rhs) deletenode(node->rhs); @@ -1712,6 +1733,7 @@ deferfuncenv(Source *source, int keydeclinfunc) if (funcenv) { if (!funcenv->pending) { funcenv->pending = true; + if (!source->pendingenvhead) { source->pendingenvtail = funcenv; source->pendingenvhead = funcenv; @@ -1965,6 +1987,7 @@ checkend(Source *source, bool hastail, int needindent, { if (getkind(source) == '\n') { gettok(source); + if (getkind(source) == ';') { error(getloc(source), expecterrmsg); gettok(source); @@ -2017,12 +2040,10 @@ stmtlist(Source *source, int indent, EnvKind envkind, for (;;) { Node *stmt; - if (checkend(source, !!tail, needindent, - "expected expression")) + if (checkend(source, !!tail, needindent, "expected expression")) break; stmt = exprlist(source, false, NULL); - stmt = tokennode(source, stmt); stmt->kind = ASTMT; @@ -2139,6 +2160,7 @@ redodeclaration: gettok(source); if (tryreadtype && (envkind == SSTRUCT || envkind == SUNION)) { + if (!isbasicdelimiter(getkind(source)) && getkind(source) != '(') { @@ -2202,6 +2224,7 @@ redodeclaration: result = tokennode(source, NULL); result->kind = 'T'; result->type = ty; + return result; } @@ -2520,6 +2543,7 @@ readatom(Source *source, int flags) gettok(source); lhs = declaration(source, gettype(source, type), false); } while (0); + break; case 'N': @@ -2531,6 +2555,7 @@ readatom(Source *source, int flags) if (flags & QCONST) { /* TODO(m21c): const - conversion */ } + break; case KVAR: @@ -2579,6 +2604,7 @@ readatom(Source *source, int flags) } else { lhs->lhs = readatom(source, 0); } + break; case KBITCAST: @@ -2627,6 +2653,7 @@ readatom(Source *source, int flags) /* if is atom */ if (!isdelimiter(source->tok.kind)) lhs->rhs = exprlist(source, false, NULL); + break; case KDO: @@ -2659,7 +2686,6 @@ readatom(Source *source, int flags) gettok(source); lhs->u.payload = readexpr(source, POR); lhs->lhs = stmtlist(source, indent, SWHILE, NULL, false); - goto joinelse; case KIF: @@ -2718,6 +2744,7 @@ readatom(Source *source, int flags) error(getloc(source), "expected identifier"); lhs->rhs = tokennode(source, NULL); + } else if (getkind(source) == '(') { gettok(source); @@ -2728,6 +2755,7 @@ readatom(Source *source, int flags) expect(source, ')', "expected ')'"); continue; + } else if (getkind(source) == '[') { gettok(source); @@ -3097,6 +3125,7 @@ conv(Node *node) if (ty->kind == TINFER) return wrap(prim + TINT, node); + if (ty->kind == TUINFER) return wrap(prim + TUINT, node); @@ -3179,12 +3208,14 @@ resolvepending(Env *env, Node *expr) if (!decl) { error(&expr->loc, "'%s' undeclared", getstring(idents, expr->u.key)); + return expr; } if (decl->kind != DVAR && decl->kind != DFUNCTION) { error(&expr->loc, "'%s' is not a variable nor a function", getstring(idents, expr->u.key)); + return expr; } @@ -3462,11 +3493,13 @@ typecheck(Env *env, Node *expr) advancestmt: lhs = typecheck(env, lhs); rhs->lhs = lhs; + if (rhs->rhs) { assert(rhs->rhs->kind == ASTMT); rhs = rhs->rhs, lhs = rhs->lhs; goto advancestmt; } + return expr; case ADECL: @@ -3721,7 +3754,6 @@ foldexpr(Env *env, Node *expr) assert(expr->u.env); expr->lhs = foldexpr(expr->u.env, expr->lhs); - return expr; case ASTMT: @@ -3729,11 +3761,13 @@ foldexpr(Env *env, Node *expr) advancestmt: lhs = foldexpr(env, lhs); rhs->lhs = lhs; + if (rhs->rhs) { assert(rhs->rhs->kind == ASTMT); rhs = rhs->rhs, lhs = rhs->lhs; goto advancestmt; } + return expr; case ACOMMA: @@ -3767,7 +3801,6 @@ foldexpr(Env *env, Node *expr) deletenode(lhs); expr->kind = 'N'; - return expr; case ACONV: @@ -3836,7 +3869,7 @@ promptenvpath(Env* currenv) envstring = getstring(idents, key); } - fprintf(stdout, "%s/", envstring); + fprintf(stdout, "# scope: %s/", envstring); } } @@ -3845,13 +3878,15 @@ tryprompt(Source *source, const char ch) { if (source->handlereplprompt) { Env *currenv = source->currenv; + if (ch == '.' && currenv && currenv->kind != STOPLEVEL) { - fputs("\e[34m", stdout); + fputs("\e[1;30m", stdout); promptenvpath(currenv); fprintf(stdout, "\n\e[35m%c \e[0m", ch); } else { fprintf(stdout, "\e[35m%c \e[0m", ch); } + } else if (source->filein == stdin) { source->handlereplprompt = true; } @@ -4025,6 +4060,7 @@ printtypetail(FILE *out, Type *type, int indent) } else { n += printtypetail(out, type->u.rtarget, indent); } + break; #define typecase(type, str) \ @@ -4341,6 +4377,7 @@ printexpr(FILE *out, Node *expr, int indent) putc(' ', out), ++n; } } + n += printoperant(out, expr->lhs, PUNARY, false, indent); } } else { @@ -4381,6 +4418,7 @@ printexpr(FILE *out, Node *expr, int indent) n += fprintf(out, "true"); else n += fprintf(out, "0x%016lx", expr->u.u); + break; case TPTR: @@ -4396,6 +4434,7 @@ printexpr(FILE *out, Node *expr, int indent) break; } + break; case 'S': @@ -4499,6 +4538,7 @@ printexpr(FILE *out, Node *expr, int indent) if (expr->rhs) n += printclause(out, expr->rhs, indent); + break; case ASTMT: @@ -4518,6 +4558,7 @@ printexpr(FILE *out, Node *expr, int indent) expr = expr->rhs; goto advancestmt; } + break; case ASCOPE: @@ -4553,6 +4594,7 @@ printexpr(FILE *out, Node *expr, int indent) default: n += highlight(out, HLINFO); n += fprintf(out, "node(%u)", expr->kind); + if (expr->lhs) { n += fprintf(out, " -> "); n += printsubexpr(out, expr->lhs, true, indent); @@ -4764,8 +4806,11 @@ main(int argc, char **argv) if (source->lastkind != ';' && source->lastkind != '\n') { error(getloc(source), "expected new line"); - while (getkind(source) != ';' && getkind(source) != '\n' && getkind(source) != 0) + while (getkind(source) != ';' && + getkind(source) != '\n' && getkind(source) != 0) + { gettok(source); + } if (source->filein == stdin) { highlight(stdout, HLPROMPT);