语法分析器(syntax analyzer)【C实现】

编程入门 行业动态 更新时间:2024-10-13 12:18:54

语法<a href=https://www.elefans.com/category/jswz/34/1764964.html style=分析器(syntax analyzer)【C实现】"/>

语法分析器(syntax analyzer)【C实现】

查看正文内容

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>
#include <stdbool.h>
#include <ctype.h>#define NELEMS(arr) (sizeof(arr) / sizeof(arr[0]))typedef enum {tk_EOI, tk_Mul, tk_Div, tk_Mod, tk_Add, tk_Sub, tk_Negate, tk_Not, tk_Lss, tk_Leq, tk_Gtr,tk_Geq, tk_Eql, tk_Neq, tk_Assign, tk_And, tk_Or, tk_If, tk_Else, tk_While, tk_Print,tk_Putc, tk_Lparen, tk_Rparen, tk_Lbrace, tk_Rbrace, tk_Semi, tk_Comma, tk_Ident,tk_Integer, tk_String
} TokenType;typedef enum {nd_Ident, nd_String, nd_Integer, nd_Sequence, nd_If, nd_Prtc, nd_Prts, nd_Prti, nd_While,nd_Assign, nd_Negate, nd_Not, nd_Mul, nd_Div, nd_Mod, nd_Add, nd_Sub, nd_Lss, nd_Leq,nd_Gtr, nd_Geq, nd_Eql, nd_Neq, nd_And, nd_Or
} NodeType;typedef struct {TokenType tok;int err_ln;int err_col;char *text;             /* ident or string literal or integer value */
} tok_s;typedef struct Tree {NodeType node_type;struct Tree *left;struct Tree *right;char *value;
} Tree;// dependency: Ordered by tok, must remain in same order as TokenType enum
struct {char       *text, *enum_text;TokenType   tok;bool        right_associative, is_binary, is_unary;int         precedence;NodeType    node_type;
} atr[] = {{"EOI",             "End_of_input"   , tk_EOI,     false, false, false, -1, -1        },{"*",               "Op_multiply"    , tk_Mul,     false, true,  false, 13, nd_Mul    },{"/",               "Op_divide"      , tk_Div,     false, true,  false, 13, nd_Div    },{"%",               "Op_mod"         , tk_Mod,     false, true,  false, 13, nd_Mod    },{"+",               "Op_add"         , tk_Add,     false, true,  false, 12, nd_Add    },{"-",               "Op_subtract"    , tk_Sub,     false, true,  false, 12, nd_Sub    },{"-",               "Op_negate"      , tk_Negate,  false, false, true,  14, nd_Negate },{"!",               "Op_not"         , tk_Not,     false, false, true,  14, nd_Not    },{"<",               "Op_less"        , tk_Lss,     false, true,  false, 10, nd_Lss    },{"<=",              "Op_lessequal"   , tk_Leq,     false, true,  false, 10, nd_Leq    },{">",               "Op_greater"     , tk_Gtr,     false, true,  false, 10, nd_Gtr    },{">=",              "Op_greaterequal", tk_Geq,     false, true,  false, 10, nd_Geq    },{"==",              "Op_equal"       , tk_Eql,     false, true,  false,  9, nd_Eql    },{"!=",              "Op_notequal"    , tk_Neq,     false, true,  false,  9, nd_Neq    },{"=",               "Op_assign"      , tk_Assign,  false, false, false, -1, nd_Assign },{"&&",              "Op_and"         , tk_And,     false, true,  false,  5, nd_And    },{"||",              "Op_or"          , tk_Or,      false, true,  false,  4, nd_Or     },{"if",              "Keyword_if"     , tk_If,      false, false, false, -1, nd_If     },{"else",            "Keyword_else"   , tk_Else,    false, false, false, -1, -1        },{"while",           "Keyword_while"  , tk_While,   false, false, false, -1, nd_While  },{"print",           "Keyword_print"  , tk_Print,   false, false, false, -1, -1        },{"putc",            "Keyword_putc"   , tk_Putc,    false, false, false, -1, -1        },{"(",               "LeftParen"      , tk_Lparen,  false, false, false, -1, -1        },{")",               "RightParen"     , tk_Rparen,  false, false, false, -1, -1        },{"{",               "LeftBrace"      , tk_Lbrace,  false, false, false, -1, -1        },{"}",               "RightBrace"     , tk_Rbrace,  false, false, false, -1, -1        },{";",               "Semicolon"      , tk_Semi,    false, false, false, -1, -1        },{",",               "Comma"          , tk_Comma,   false, false, false, -1, -1        },{"Ident",           "Identifier"     , tk_Ident,   false, false, false, -1, nd_Ident  },{"Integer literal", "Integer"        , tk_Integer, false, false, false, -1, nd_Integer},{"String literal",  "String"         , tk_String,  false, false, false, -1, nd_String },
};char *Display_nodes[] = {"Identifier", "String", "Integer", "Sequence", "If", "Prtc","Prts", "Prti", "While", "Assign", "Negate", "Not", "Multiply", "Divide", "Mod","Add", "Subtract", "Less", "LessEqual", "Greater", "GreaterEqual", "Equal","NotEqual", "And", "Or"};static tok_s tok;
static FILE *source_fp, *dest_fp;Tree *paren_expr();void error(int err_line, int err_col, const char *fmt, ... ) {va_list ap;char buf[1000];va_start(ap, fmt);vsprintf(buf, fmt, ap);va_end(ap);printf("(%d, %d) error: %s\n", err_line, err_col, buf);exit(1);
}char *read_line(int *len) {static char *text = NULL;static int textmax = 0;for (*len = 0; ; (*len)++) {int ch = fgetc(source_fp);if (ch == EOF || ch == '\n') {if (*len == 0)return NULL;break;}if (*len + 1 >= textmax) {textmax = (textmax == 0 ? 128 : textmax * 2);text = realloc(text, textmax);}text[*len] = ch;}text[*len] = '\0';return text;
}char *rtrim(char *text, int *len) {         // remove trailing spacesfor (; *len > 0 && isspace(text[*len - 1]); --(*len));text[*len] = '\0';return text;
}TokenType get_enum(const char *name) {      // return internal version of namefor (size_t i = 0; i < NELEMS(atr); i++) {if (strcmp(atr[i].enum_text, name) == 0)return atr[i].tok;}error(0, 0, "Unknown token %s\n", name);return 0;
}tok_s gettok() {int len;tok_s tok;char *yytext = read_line(&len);yytext = rtrim(yytext, &len);// [ ]*{lineno}[ ]+{colno}[ ]+token[ ]+optional// get line and columntok.err_ln  = atoi(strtok(yytext, " "));tok.err_col = atoi(strtok(NULL, " "));// get the token namechar *name = strtok(NULL, " ");tok.tok = get_enum(name);// if there is extra data, get itchar *p = name + strlen(name);if (p != &yytext[len]) {for (++p; isspace(*p); ++p);tok.text = strdup(p);}return tok;
}Tree *make_node(NodeType node_type, Tree *left, Tree *right) {Tree *t = calloc(sizeof(Tree), 1);t->node_type = node_type;t->left = left;t->right = right;return t;
}Tree *make_leaf(NodeType node_type, char *value) {Tree *t = calloc(sizeof(Tree), 1);t->node_type = node_type;t->value = strdup(value);return t;
}void expect(const char msg[], TokenType s) {if (tok.tok == s) {tok = gettok();return;}error(tok.err_ln, tok.err_col, "%s: Expecting '%s', found '%s'\n", msg, atr[s].text, atr[tok.tok].text);
}Tree *expr(int p) {Tree *x = NULL, *node;TokenType op;switch (tok.tok) {case tk_Lparen:x = paren_expr();break;case tk_Sub: case tk_Add:op = tok.tok;tok = gettok();node = expr(atr[tk_Negate].precedence);x = (op == tk_Sub) ? make_node(nd_Negate, node, NULL) : node;break;case tk_Not:tok = gettok();x = make_node(nd_Not, expr(atr[tk_Not].precedence), NULL);break;case tk_Ident:x = make_leaf(nd_Ident, tok.text);tok = gettok();break;case tk_Integer:x = make_leaf(nd_Integer, tok.text);tok = gettok();break;default:error(tok.err_ln, tok.err_col, "Expecting a primary, found: %s\n", atr[tok.tok].text);}while (atr[tok.tok].is_binary && atr[tok.tok].precedence >= p) {TokenType op = tok.tok;tok = gettok();int q = atr[op].precedence;if (!atr[op].right_associative)q++;node = expr(q);x = make_node(atr[op].node_type, x, node);}return x;
}Tree *paren_expr() {expect("paren_expr", tk_Lparen);Tree *t = expr(0);expect("paren_expr", tk_Rparen);return t;
}Tree *stmt() {Tree *t = NULL, *v, *e, *s, *s2;switch (tok.tok) {case tk_If:tok = gettok();e = paren_expr();s = stmt();s2 = NULL;if (tok.tok == tk_Else) {tok = gettok();s2 = stmt();}t = make_node(nd_If, e, make_node(nd_If, s, s2));break;case tk_Putc:tok = gettok();e = paren_expr();t = make_node(nd_Prtc, e, NULL);expect("Putc", tk_Semi);break;case tk_Print: /* print '(' expr {',' expr} ')' */tok = gettok();for (expect("Print", tk_Lparen); ; expect("Print", tk_Comma)) {if (tok.tok == tk_String) {e = make_node(nd_Prts, make_leaf(nd_String, tok.text), NULL);tok = gettok();} elsee = make_node(nd_Prti, expr(0), NULL);t = make_node(nd_Sequence, t, e);if (tok.tok != tk_Comma)break;}expect("Print", tk_Rparen);expect("Print", tk_Semi);break;case tk_Semi:tok = gettok();break;case tk_Ident:v = make_leaf(nd_Ident, tok.text);tok = gettok();expect("assign", tk_Assign);e = expr(0);t = make_node(nd_Assign, v, e);expect("assign", tk_Semi);break;case tk_While:tok = gettok();e = paren_expr();s = stmt();t = make_node(nd_While, e, s);break;case tk_Lbrace:         /* {stmt} */for (expect("Lbrace", tk_Lbrace); tok.tok != tk_Rbrace && tok.tok != tk_EOI;)t = make_node(nd_Sequence, t, stmt());expect("Lbrace", tk_Rbrace);break;case tk_EOI:break;default: error(tok.err_ln, tok.err_col, "expecting start of statement, found '%s'\n", atr[tok.tok].text);}return t;
}Tree *parse() {Tree *t = NULL;tok = gettok();do {t = make_node(nd_Sequence, t, stmt());} while (t != NULL && tok.tok != tk_EOI);return t;
}void prt_ast(Tree *t) {if (t == NULL)printf(";\n");else {printf("%-14s ", Display_nodes[t->node_type]);if (t->node_type == nd_Ident || t->node_type == nd_Integer || t->node_type == nd_String) {printf("%s\n", t->value);} else {printf("\n");prt_ast(t->left);prt_ast(t->right);}}
}void init_io(FILE **fp, FILE *std, const char mode[], const char fn[]) {if (fn[0] == '\0')*fp = std;else if ((*fp = fopen(fn, mode)) == NULL)error(0, 0, "Can't open %s\n", fn);
}int main(int argc, char *argv[]) {init_io(&source_fp, stdin,  "r",  argc > 1 ? argv[1] : "");init_io(&dest_fp,   stdout, "wb", argc > 2 ? argv[2] : "");prt_ast(parse());
}

输出:

Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier     count
Integer        1
Assign
Identifier     n
Integer        1
Assign
Identifier     limit
Integer        100
While
Less
Identifier     n
Identifier     limit
Sequence
Sequence
Sequence
Sequence
Sequence
;
Assign
Identifier     k
Integer        3
Assign
Identifier     p
Integer        1
Assign
Identifier     n
Add
Identifier     n
Integer        2
While
And
LessEqual
Multiply
Identifier     k
Identifier     k
Identifier     n
Identifier     p
Sequence
Sequence
;
Assign
Identifier     p
NotEqual
Multiply
Divide
Identifier     n
Identifier     k
Identifier     k
Identifier     n
Assign
Identifier     k
Add
Identifier     k
Integer        2
If
Identifier     p
If
Sequence
Sequence
;
Sequence
Sequence
;
Prti
Identifier     n
;
Prts
String         " is prime\n"
;
Assign
Identifier     count
Add
Identifier     count
Integer        1
;
Sequence
Sequence
Sequence
;
Prts
String         "Total primes found: "
;
Prti
Identifier     count
;
Prts
String         "\n"
;

更多推荐

语法分析器(syntax analyzer)【C实现】

本文发布于:2024-03-06 02:33:23,感谢您对本站的认可!
本文链接:https://www.elefans.com/category/jswz/34/1714125.html
版权声明:本站内容均来自互联网,仅供演示用,请勿用于商业和其他非法用途。如果侵犯了您的权益请与我们联系,我们将在24小时内删除。
本文标签:分析器   语法   analyzer   syntax

发布评论

评论列表 (有 0 条评论)
草根站长

>www.elefans.com

编程频道|电子爱好者 - 技术资讯及电子产品介绍!