Aria

A low-level systems programming language
git clone git://git.m21c.me/Aria.git
Log | Files | Refs | LICENSE

commit 8480c7bcaea20aae2597b6a20387405ceba7d4b3
parent 0b7f291572407fee68bac011a6f4a2c509a55f21
Author: m21c  <ho*******@gmail.com>
Date:   Sat,  2 Oct 2021 14:16:41 +0200

extracted tokenize functions from gettok() for better code organization

Diffstat:
Mcompiler.c | 372+++++++++++++++++++++++++++++++++++++++----------------------------------------
1 file changed, 185 insertions(+), 187 deletions(-)

diff --git a/compiler.c b/compiler.c @@ -943,63 +943,8 @@ error(SrcLoc *loc, const char *fmt, ...) ((indent) + (source)->tabwidth - ((indent) % (source)->tabwidth)) static int -gettok(Source *source) +tokenizealphanumeric(Source *source, register int c0) { - register int c0 = (uchar) source->line[source->currloc.column]; - static bool hasnewline = false; - - source->lastkind = source->tok.kind; - -skipwhite: - if (hasnewline) { - if (!mygetline(source)) { - source->lastindent = 0; - return source->tok.kind = 0; - } - - c0 = source->line[(source->currloc.column = 0)]; - } - - if (source->currloc.column) { - while (isspace(c0)) - c0 = source->line[++source->currloc.column]; - - } else { - source->lastindent = 0; - while (isspace(c0)) { - if (c0 == '\t') { - source->lastindent = nextindent( - source, - source->lastindent - ); - } else { - ++source->lastindent; - } - - c0 = source->line[++source->currloc.column]; - } - } - - source->tok.type = prim + TUNDEFINED; - source->tok.u.u = 0; - source->tok.lhs = NULL; - source->tok.rhs = NULL; - source->tok.loc.column = source->currloc.column; - - /* get line */ - if (!c0 || c0 == '#') { - if (hasnewline) { - goto skipwhite; - } else { - hasnewline = true; - return source->tok.kind = '\n'; - } - } - - hasnewline = false; - - /* identifier or keyword */ - if (isalpha(c0) || c0 == '_') { int keyword; while (isalnum(c0) || c0 == '_') @@ -1039,9 +984,90 @@ skipwhite: return source->tok.kind = 'I'; } - /* number literal */ - if (isdigit(c0) || (c0 == '.' && - isdigit(source->line[source->currloc.column+1]))) +static Type * +suffixfloattype(Source *source, const char *end) +{ + Type *ty = prim + TDOUBLE; + + if (*end == 0) + return ty; + + /* FIXME(m21c): r-suffix might conflict with radix */ + if ((*end == 'f' || *end == 'F') && !end[1]) { + ty = prim + TFLOAT; + + } else if (*end == 'l' || *end == 'L') { + ty = prim + TDOUBLE; + + if (end[1]) + goto errorfloat; + } else if (!mystrcasecmp(end, "f32") || !mystrcasecmp(end, "r32")) { + ty = prim + TF32; + + } else if (!mystrcasecmp(end, "f64") || !mystrcasecmp(end, "r64")) { + ty = prim + TF64; + + } else { + errorfloat: + error(&source->currloc, "invalid floating-point format"); + } + + return ty; +} + +static Type * +suffixinttype(Source *source, const char *end) +{ + int typeid = TUINT - TINT; + + switch (*end) { + case 0: + return prim + TINFER; + + case 's': case 'S': case 'i': case 'I': + typeid = 0; + + case 'u': case 'U': + ++end; + if (*end == 0) { + return prim + (typeid + TINFER); + } else if (*end == '8') { + typeid += TS8; + + if (end[1]) + goto errorint; + + return prim + typeid; + } else if (!strcmp(end, "16")) { + return prim + (typeid + TS16); + } else if (!strcmp(end, "32")) { + return prim + (typeid + TS32); + } else if (!strcmp(end, "64")) { + return prim + (typeid + TS64); + } else if (!mystrcasecmp(end, "sz")) { + return prim + (typeid + TSSIZE); + } + + default: + if (!mystrcasecmp(end, "ll")) { + return prim + (typeid + TLLONG); + } else if (*end == 'l' || *end == 'L') { + typeid += TLONG; + + if (end[1]) + goto errorint; + + return prim + typeid; + } + } + +errorint: + error(&source->currloc, "invalid integer format"); + return prim + TINT; +} + +static int +tokenizenumber(Source *source, register int c0) { int l = c0, t = source->line[source->currloc.column+1], i, j; bool hasdec = false, hasexp = false; @@ -1100,39 +1126,8 @@ skipwhite: strpbrk(source->stringbuf, "eEfF"))) { source->tok.u.d = strtod(source->stringbuf, &end); - source->tok.type = prim + TDOUBLE; - - if (*end != 0) { - /* FIXME(m21c): r-suffix might conflict with radix */ - if ((*end == 'f' || *end == 'F') && !end[1]) { - source->tok.type = prim + TFLOAT; - - } else if (*end == 'l' || *end == 'L') { - source->tok.type = prim + TDOUBLE; - - if (end[1]) - goto errorfloat; - } else if (!mystrcasecmp(end, "f32") || - !mystrcasecmp(end, "r32")) - { - source->tok.type = prim + TF32; - - } else if (!mystrcasecmp(end, "f64") || - !mystrcasecmp(end, "r64")) - { - source->tok.type = prim + TF64; - + source->tok.type = suffixfloattype(source, end); } else { - errorfloat: - error( - &source->currloc, - "invalid floating-point format" - ); - } - } - } else { - int typeid = TUINT - TINT; - if (mystrncasecmp(source->stringbuf, "0b", 2) == 0) { source->tok.u.u = strtoull( source->stringbuf + 2, @@ -1147,73 +1142,15 @@ skipwhite: } - switch (*end) { - case 0: - typeid = TINFER; - break; - - case 's': case 'S': - case 'i': case 'I': - typeid = 0; - - case 'u': case 'U': - ++end; - if (*end == 0) { - typeid += TINFER; - - break; - } else if (*end == '8') { - typeid += TS8; - - if (end[1]) - goto errorint; - break; - } else if (!strcmp(end, "16")) { - typeid += TS16; - - break; - } else if (!strcmp(end, "32")) { - typeid += TS32; - - break; - } else if (!strcmp(end, "64")) { - typeid += TS64; - - break; - } else if (!mystrcasecmp(end, "sz")) { - typeid += TSSIZE; - - break; - } - - default: - if (!mystrcasecmp(end, "ll")) { - typeid += TLLONG; - - } else if (*end == 'l' || *end == 'L') { - typeid += TLONG; - - if (end[1]) - goto errorint; - } else { - errorint: - error( - &source->currloc, - "invalid integer format" - ); - - typeid = TINT; - } - } - - source->tok.type = prim + typeid; + source->tok.type = suffixinttype(source, end); } return source->tok.kind = 'N'; } - /* string & character-literal */ - if (c0 == '"' || c0 == '\'') { +static int +tokenizestring(Source *source, register int c0) +{ int delim = c0, j; c0 = source->line[++source->currloc.column]; @@ -1254,11 +1191,8 @@ skipwhite: goto stringeol; default: - error( - &source->currloc, - "invalid escape sequence '\\%c'", - c0 - ); + error(&source->currloc, + "invalid escape sequence '\\%c'", c0); } } @@ -1271,10 +1205,7 @@ skipwhite: if (c0 == 0) { stringeol: - error( - &source->currloc, - "unexpected end-of-line" - ); + error(&source->currloc, "unexpected end-of-line"); return source->tok.kind = '\n'; } @@ -1288,20 +1219,87 @@ skipwhite: ); return source->tok.kind = 'S'; +} + +static int +gettok(Source *source) +{ + register int c0 = (uchar) source->line[source->currloc.column]; + static bool hasnewline = false; + + source->lastkind = source->tok.kind; + +skipwhite: + if (hasnewline) { + if (!mygetline(source)) { + source->lastindent = 0; + return source->tok.kind = 0; + } + + c0 = source->line[(source->currloc.column = 0)]; + } + + if (source->currloc.column) { + while (isspace(c0)) + c0 = source->line[++source->currloc.column]; + + } else { + source->lastindent = 0; + while (isspace(c0)) { + if (c0 == '\t') { + source->lastindent = nextindent( + source, + source->lastindent + ); + } else { + ++source->lastindent; + } + + c0 = source->line[++source->currloc.column]; + } + } + + source->tok.type = prim + TUNDEFINED; + source->tok.u.u = 0; + source->tok.lhs = NULL; + source->tok.rhs = NULL; + source->tok.loc.column = source->currloc.column; + + /* get line */ + if (!c0 || c0 == '#') { + if (hasnewline) { + goto skipwhite; + } else { + hasnewline = true; + return source->tok.kind = '\n'; + } + } + + hasnewline = false; + + /* identifier or keyword */ + if (isalpha(c0) || c0 == '_') { + return tokenizealphanumeric(source, c0); + } + + /* number literal */ + if (isdigit(c0) || (c0 == '.' && + isdigit(source->line[source->currloc.column+1]))) + { + return tokenizenumber(source, c0); + } + + /* string & character-literal */ + if (c0 == '"' || c0 == '\'') { + return tokenizestring(source, c0); } /* delimiters */ switch (c0) { - case ',': - case ';': - case '@': - case ':': - case '{': - case '}': - case ']': - case ')': - case '[': - case '(': + case ',': case ';': case '@': case ':': + case '{': case '}': + case ']': case '[': + case '(': case ')': ++source->currloc.column; return source->tok.kind = c0; } @@ -1315,30 +1313,30 @@ skipwhite: switch (source->line[source->currloc.column++]) { case '.': /* tok.kind = select('.', ORANGE, ODISP); */ - source->tok.kind = ODISP; + c0 = ODISP; goto joinop; case '*': - source->tok.kind = select('=', OMULA, OMUL); + c0 = select('=', OMULA, OMUL); goto joinop; case '/': - source->tok.kind = select('=', ODIVA, ODIV); + c0 = select('=', ODIVA, ODIV); goto joinop; case '%': - source->tok.kind = select('=', OMODA, OMOD); + c0 = select('=', OMODA, OMOD); goto joinop; case '<': - source->tok.kind = select('=', OLEQ, + c0 = select('=', OLEQ, select('<', select('=', OLSHA, OLSH), OLET)); goto joinop; case '>': - source->tok.kind = select('=', OGEQ, + c0 = select('=', OGEQ, select('>', select('>', select('=', OARSHA, OARSH), @@ -1347,37 +1345,37 @@ skipwhite: goto joinop; case '&': - source->tok.kind = select('=', OANDA, select('&', OLAND, OBAND)); + c0 = select('=', OANDA, select('&', OLAND, OBAND)); goto joinop; case '+': - source->tok.kind = select('=', OADDA, select('+', OSUFINC, OADD)); + c0 = select('=', OADDA, select('+', OSUFINC, OADD)); goto joinop; case '-': - source->tok.kind = select('=', OSUBA, select('-', OSUFDEC, OSUB)); + c0 = select('=', OSUBA, select('-', OSUFDEC, OSUB)); goto joinop; case '|': - source->tok.kind = select('=', OORA, select('|', OLOR, OBOR)); + c0 = select('=', OORA, select('|', OLOR, OBOR)); goto joinop; case '^': - source->tok.kind = select('=', OXORA, OXOR); + c0 = select('=', OXORA, OXOR); goto joinop; case '!': - source->tok.kind = select('=', ONEQ, OLNOT); + c0 = select('=', ONEQ, OLNOT); goto joinop; case '~': - source->tok.kind = select('=', OFLIP, OBNOT); + c0 = select('=', OFLIP, OBNOT); goto joinop; case '=': - source->tok.kind = select('=', select('=', OIDENT, OEQU), OASS); + c0 = select('=', select('=', OIDENT, OEQU), OASS); joinop: - return source->tok.kind; + return source->tok.kind = c0; default: error(&source->currloc, "invalid input character '%c'", c0);