From: mar77i Date: Tue, 10 Mar 2026 12:55:47 +0000 (+0100) Subject: parse_char: a character processing state machine X-Git-Url: https://git.mar77i.info/?a=commitdiff_plain;h=HEAD;p=cheapsh parse_char: a character processing state machine --- diff --git a/cheapsh.c b/cheapsh.c index 0969877..113a593 100644 --- a/cheapsh.c +++ b/cheapsh.c @@ -31,13 +31,9 @@ struct buffer { char *data; }; -enum status { - STATUS_ERROR = -1, - STATUS_OK = 0, - STATUS_END = '\n', - STATUS_SPACE = ' ', - STATUS_BACKSLASH = '\\', -}; +#define OK 0 +#define ERR -1 +#define END 1 #define BUFFER_RESIZE(b, f) do { \ void *np; \ @@ -48,128 +44,150 @@ enum status { np = realloc((b)->data, (b)->allo * (f)); \ if (np == NULL) { \ perror("realloc"); \ - return STATUS_ERROR; \ + return ERR; \ } \ (b)->data = np; \ } while (0) -static inline enum status buffer_append_char(struct buffer *buf, char c) { +static inline int buffer_append_char(struct buffer *buf, char c) { BUFFER_RESIZE(buf, 1); buf->data[buf->len - 1] = c; buf->data[buf->len++] = '\0'; - return STATUS_OK; + return OK; } #define CHARPP(c) ((char**)(c)) -static inline enum status buffer_append_char_ptr(struct buffer *buf, char *ptr) { +static inline int buffer_append_char_ptr(struct buffer *buf, char *ptr) { BUFFER_RESIZE(buf, sizeof ptr); CHARPP(buf->data)[buf->len - 1] = ptr; CHARPP(buf->data)[buf->len++] = NULL; - return STATUS_OK; + return OK; } -static inline enum status push_arg( - struct buffer *strs, struct buffer *ptrs, char *start -) { - if (buffer_append_char_ptr(ptrs, start) == STATUS_ERROR) - return STATUS_ERROR; - return buffer_append_char(strs, '\0'); -} +#define PARSE_FUNC(x) int x( \ + int c, struct buffer *strs, struct buffer *ptrs, struct cheapsh_parser_state *cps \ +) + +struct cheapsh_parser_state { + PARSE_FUNC((*parse_char)); + uintptr_t start; +}; -static inline enum status wordsplit_unquoted_char(int c) { +static PARSE_FUNC(parse_backslashed_char); + +static PARSE_FUNC(parse_unquoted_char) { switch (c) { case ' ': case '\t': - return STATUS_SPACE; + if (strs->len == cps->start) + return OK; + else if ( + buffer_append_char_ptr(ptrs, (char*)cps->start) < 0 + || buffer_append_char(strs, '\0') < 0 + ) + return ERR; + cps->start = strs->len - 1; + break; case '\\': - return STATUS_BACKSLASH; + cps->parse_char = parse_backslashed_char; + break; case '\r': case '\n': case ';': - return STATUS_END; + return END; + default: + if (!isprint(c)) { + fprintf(stderr, "Error: invalid input: '%d'\n", c); + return ERR; + } else if (buffer_append_char(strs, c) < 0) + return ERR; } - return STATUS_OK; + return OK; } -enum status wordsplit(FILE *fh, struct buffer *strs, struct buffer *ptrs) { - uintptr_t start = 0; - size_t count = 0, i; +static PARSE_FUNC(parse_backslashed_char) { + switch (c) { + case '\r': + case '\n': + if (strs->len == cps->start) + break; + else if ( + buffer_append_char_ptr(ptrs, (char*)cps->start) < 0 + || buffer_append_char(strs, '\0') < 0 + ) + return ERR; + cps->start = strs->len - 1; + break; + default: + if (!isprint(c)) { + fprintf(stderr, "Error: invalid input: '%d'\n", c); + return ERR; + } else if (buffer_append_char(strs, c) < 0) + return ERR; + } + cps->parse_char = parse_unquoted_char; + return OK; +} + +int parse(FILE *fh, struct buffer *strs, struct buffer *ptrs) { + struct cheapsh_parser_state cps = { parse_unquoted_char, 0 }; + size_t i; int c; - enum status st = STATUS_OK; - while ((c = fgetc(fh)) != EOF) { - if (st == STATUS_BACKSLASH) { - if ((st = buffer_append_char(strs, c)) == STATUS_ERROR) - return STATUS_ERROR; + while ((c = fgetc(fh)) != EOF) + switch (cps.parse_char(c, strs, ptrs, &cps)) { + case ERR: + return ERR; + case OK: continue; - } - switch (st = wordsplit_unquoted_char(c)) { - case STATUS_ERROR: - return STATUS_ERROR; - case STATUS_OK: - if (!isprint(c)) { - fprintf(stderr, "Error: invalid input: '%d'\n", c); - return STATUS_ERROR; - } else if (buffer_append_char(strs, c) == STATUS_ERROR) - return STATUS_ERROR; - break; - case STATUS_END: + case END: goto done; - case STATUS_SPACE: - if (strs->len == start) - return STATUS_OK; - else if (push_arg(strs, ptrs, (char*)start) == STATUS_ERROR) - return STATUS_ERROR; - start = strs->len - 1; - count++; - break; - case STATUS_BACKSLASH: - break; } - } done: if (c == EOF && ferror(fh) != 0) { fprintf(stderr, "Error: read error!\n"); - return STATUS_ERROR; - } else if (st == STATUS_BACKSLASH) { + return ERR; + } else if (cps.parse_char == parse_backslashed_char) { fprintf(stderr, "Error: backslash at EOF!\n"); - return STATUS_ERROR; - } else if (strs->len > start) { - if (push_arg(strs, ptrs, (char*)start) == STATUS_ERROR) - return STATUS_ERROR; - count++; - start = strs->len; + return ERR; + } else if (strs->len > cps.start) { + if ( + buffer_append_char_ptr(ptrs, (char*)cps.start) < 0 + || buffer_append_char(strs, '\0') < 0 + ) + return ERR; + cps.start = strs->len; } - for (i = 0; i < count; i++) + for (i = 0; i < ptrs->len - (ptrs->len > 0); i++) CHARPP(ptrs->data)[i] += (uintptr_t)strs->data; - return c == EOF ? STATUS_END : STATUS_OK; + return c == EOF ? END : OK; } -static inline enum status execute(char **argv) { - pid_t pid = fork(); - if (pid == 0) { - execvp(argv[0], argv); +static inline int execute(struct buffer ptrs) { + pid_t pid; + if (ptrs.len == 0) + return OK; + if ((pid = fork()) == 0) { + execvp(*CHARPP(ptrs.data), CHARPP(ptrs.data)); perror("execvp"); - return STATUS_ERROR; + return ERR; } else if (waitpid(pid, NULL, 0) < 0) { perror("waitpid"); - return STATUS_ERROR; + return ERR; } - return STATUS_OK; + return OK; } int main(int argc, char *argv[]) { - int ret = EXIT_FAILURE; - enum status st; + int ret = EXIT_FAILURE, parse_result; struct buffer strs = { 0, 0, NULL }, ptrs = { 0, 0, NULL }; - do { - if ( - (st = wordsplit(stdin, &strs, &ptrs)) == STATUS_ERROR - || (ptrs.len > 0 && execute(CHARPP(ptrs.data)) == STATUS_ERROR) - ) + for (;;) { + if ((parse_result = parse(stdin, &strs, &ptrs)) < 0 || execute(ptrs) < 0) goto error; + else if (parse_result == END) + break; ptrs.len = strs.len = 0; - } while (st != STATUS_END); + } ret = EXIT_SUCCESS; error: free(strs.data);