LEX实验报告

编程入门行业动态更新时间:2024-10-11 09:26:30

LEX实验<a href=https://www.elefans.com/category/jswz/34/1770268.html style= 报告"/>

LEX实验报告

程序设计1 实验报告

题目简介：词法分析程序的设计与实现
实验环境
实验流程
- 输出形式
- 源代码分析
- 测试流程
实验总结
副录

实验内容

实验内容：设计并实现C语言的词法分析程序，要求实现以下功能：

可以识别出用C语言编写的源程序中的每个单词符号，并以记号的形式输出每个单词符号
可以识别并跳过源程序中的注释
可以统计源程序中的语句行数，各类单词的个数，以及字符总数，并输出统计结果
检查源程序中存在的词法错误，并报告错误所在的位置
对源程序中出现的错误进行适当的恢复，使词法分析可以继续进行，对源程序进行一次扫描，即可检查并报告源程序中存在的所有词法错误

实验方法

编写LEX源程序，利用LEX编译程序自动生成词法分析程序。

实验环境

操作系统： macOS Mojave 10.14.4
lex版本：flex 2.5.35 Apple(flex-31)
yacc版本：bison (GNU Bison) 2.3

实验流程

输出形式

我们假设该词法分析程序使用表1所给出的翻译表。在分离出一个单词后，对识别出的记号以二元式的形式加以输出，形式为 <TOKEN, ATTRIBUTE>。

对于C语言的关键字，我们将每个关键字单独设为一类，所以其记号唯一代表一个关键字，不再需要属性，C语言中的关键字总结如下：

re	token	attribute	re	token	attribute
char	char	none	int	int	none
double	double	none	enum	enum	none
float	float	none	long	long	none
short	short	none	struct	struct	none
signed	signed	none	union	union	none
unsigned	unsigned	none	void	void	none
for	for	none	do	do	none
while	while	none	break	break	none
continue	continue	none	if	if	none
else	else	none	goto	goto	none
switch	switch	none	case	case	none
default	default	none	return	return	none
auto	auto	none	register	register	none
extern	extern	none	static	static	none
const	const	none	sizeof	sizeof	none
typedef	typedef	none	volatile	volatile	none

正则表达式

标识符
{letter}({letter}|{digit})*
常数
{digit}+(.{digit}+)?((e|E)[+-]?{digit}+)?
字符串
("(.)?")|(’.?’)*
分隔符
[();{}[],]
注释
单行 //.?*
多行 /*[^*/]?*/*
空白符
[ \t]
换行符
[\n]
预处理
#.*
词法错误
非法标识符
wrong_identifier -> {digit}+{letter}({letter}|{digit})*
引号不匹配
quotation_not_match -> [\’\"]
注释不匹配
annotation_not_match -> (\/\ | \\/)
字符错误
wrong_character -> .

翻译规则

关键字
<type, keyword, keyword_id>三元组的形式输出关键字信息。
关键字个数加一，加上相应的字符数。
封装：void Keyword(int keyword_id);
标识符
<type, identifier, id_addr>三元组的形式输出标识符信息。
标识符个数加一，加上相应的字符数。
封装：void Identifier(char *yytext);
常数
<type, constant>二元组的形式输出常数信息。
常数个数加一，加上相应的字符数。
封装：void Constant(char *yytext);
运算符
<type, operator, operator_id>三元组的形式输出运算符信息。
运算符个数加一，加上相应的字符数。
封装：void Operator(int operator_id);
字符串
<type, string>二元组的形式输出字符串信息。
字符串个数加一，加上相应的字符数。
封装：void String(char *yytext);
分隔符
<type, separator, sepatator_id>三元组的形式输出分隔符信息。
分隔符个数加一，加上相应的字符数。
封装：void Separator(char *yytext);
注释
<type, annotation_content>二元组的形式输出注释信息。
注释个数加一，加上相应的字符数。
封装：void SingleLineComment(char *yytext);
void MultiLineComment(char *yytext);
空白符
加上相应的字符数。
换行符
行数加一，加上相应的字符数。
预处理
<type, info>二元组的形式输出预处理信息。
加上相应的字符数。
词法错误
输出相关错误信息，词法错误数++，加上相应的字符数。
常数个数加一，加上相应的字符数。

测试流程

测试方法：
我们使用已经编写好的脚本startup.sh来进行测试。

# startup.sh 
flex lex.l
gcc -c lex.yy.c
gcc -o parser lex.yy.o -ll
./parser test.c result.txt # test.c 表示输入源，result.c 表示输出源

2.测试结果：
首先我们在正确的C语言源程序上进行测试

#include<string.h>
#include<stdio.h>int main()
{int a = 0;char ch = 'a';for(long i = 0; i < 1e1; ++i){printf("hello   world times: %ld ", i);}/* ejaig"dfah  j  *//* aa */ /* bb */// something interested.return 0;
}

执行结果：（见result.txt）
Lexical Analysis
r ---------------------------------------------------------- r
input source: test.c, output source: result.txt
r ---------------------------------------------------------- r
line: 1 <预处理, #include<string.h>>
line: 2 <预处理, #include<stdio.h>>
line: 4 <关键字, int, 30>
line: 4 <标识符, main>
line: 4 <分隔符, (>
line: 4 <分隔符, )>
line: 5 <分隔符, {>
line: 6 <关键字, int, 30>
line: 6 <标识符, a>
line: 6 <操作符, =, 90>
line: 6 <数字, 0>
line: 6 <分隔符, ;>
line: 7 <关键字, char, 34>
line: 7 <标识符, ch>
line: 7 <操作符, =, 90>
line: 7 <字符串, ‘a’>
line: 7 <分隔符, ;>
line: 8 <关键字, for, 42>
line: 8 <分隔符, (>
line: 8 <关键字, long, 33>
line: 8 <标识符, i>
line: 8 <操作符, =, 90>
line: 8 <数字, 0>
line: 8 <分隔符, ;>
line: 8 <标识符, i>
line: 8 <操作符, <, 91>
line: 8 <数字, 1e1>
line: 8 <分隔符, ;>
line: 8 <操作符, ++, 88>
line: 8 <标识符, i>
line: 8 <分隔符, )>
line: 8 <分隔符, {>
line: 9 <标识符, printf>
line: 9 <分隔符, (>
line: 9 <字符串, "hello world times: %ld “>
line: 9 <分隔符, ,>
line: 9 <标识符, i>
line: 9 <分隔符, )>
line: 9 <分隔符, ;>
line: 10 <分隔符, }>
line: 11-13 <多行注释, /* ejaig” dfah j />
line: 14-14 <多行注释, / aa />
line: 14-14 <多行注释, / bb */>
line: 15 <单行注释, // something interested>
line: 16 <关键字, return, 53>
line: 16 <数字, 0>
line: 16 <分隔符, ;>
line: 17 <分隔符, }>

r ---------------------------------------------------------- r
There are 20 lines, and 132 characters
There are 7 KeyWord, 9 Identifier, 5 Number, 18 Separator
There are 1 String, 4 Annotation, 6 Operator, 2 Pretreatment
Total Errors : 3
r---------------------------------------------------------- r

接着，我们测试在有一些词法错误的c语言源程序上的结果：

#include<string.h>
#include<stdio.h>int main()
{/* ejaig"dfah  j  *//* aa */ /* bb */// something interested.int a = 0;char ch = 'a;int 93yuan = 0;for(long i = 0; i < 1e1; ++i){printf("hello   world times: %ld ", i);}@return 0;
}

测试结果：
Lexical Analysis
r ---------------------------------------------------------- r
input source: test.c, output source: result.txt
r ---------------------------------------------------------- r
line: 1 <预处理, #include<string.h>>
line: 2 <预处理, #include<stdio.h>>
line: 4 <关键字, int, 30>
line: 4 <标识符, main>
line: 4 <分隔符, (>
line: 4 <分隔符, )>
line: 5 <分隔符, {>
line: 6-8 <多行注释, /* ejaig" dfah j />
line: 9-9 <多行注释, / aa />
line: 9-9 <多行注释, / bb */>
line: 10 <单行注释, // something interested>
line: 11 <关键字, int, 30>
line: 11 <标识符, a>
line: 11 <操作符, =, 90>
line: 11 <数字, 0>
line: 11 <分隔符, ;>
line: 12 <关键字, char, 34>
line: 12 <标识符, ch>
line: 12 <操作符, =, 90>
line: 12 <ERROR, 102, quotation_not_match>
line: 12 <标识符, a>
line: 12 <分隔符, ;>
line: 13 <关键字, int, 30>
line: 13 <ERROR, 100, wrong_identifier>
line: 13 <操作符, =, 90>
line: 13 <数字, 0>
line: 13 <分隔符, ;>
line: 14 <关键字, for, 42>
line: 14 <分隔符, (>
line: 14 <关键字, long, 33>
line: 14 <标识符, i>
line: 14 <操作符, =, 90>
line: 14 <数字, 0>
line: 14 <分隔符, ;>
line: 14 <标识符, i>
line: 14 <操作符, <, 91>
line: 14 <数字, 1e1>
line: 14 <分隔符, ;>
line: 14 <操作符, ++, 88>
line: 14 <标识符, i>
line: 14 <分隔符, )>
line: 14 <分隔符, {>
line: 15 <标识符, printf>
line: 15 <分隔符, (>
line: 15 <字符串, "hello world times: %ld ">
line: 15 <分隔符, ,>
line: 15 <标识符, i>
line: 15 <分隔符, )>
line: 15 <分隔符, ;>
line: 16 <分隔符, }>
line: 18 <ERROR, 103, wrong_character>
line: 19 <关键字, return, 53>
line: 19 <数字, 0>
line: 19 <分隔符, ;>
line: 20 <分隔符, }>

实验总结

本次实验通过使用Lex编写词法分析程序，加深了我们对编译程序过程中词法分析过程对理解，明白了程序如何通过词法分析将字符流转换为标识流。本次实验总共用时6h，包括熟悉Lex工具，编写实验代码，完成实验报告等工作。

本次实验内容虽然不是很难，但却切实的增强了我们在编写实战代码中的能力与自信，锻炼了自己上网找资料的能力，让我对编写所需代码更加有信心，更能体会到写代码对乐趣。

附录：完整LEX源代码

%{#include <stdio.h>#include <stdlib.h>#include <string.h>#include <ctype.h>// assign a value to each type#define IDENTIFIER      10#define NUMBER          11#define SEPARATOR       12#define STRING          13#define ANNOTATION      14#define PRETREATMENT    15// assign a value to each key words.#define INT         30#define FLOAT       31#define DOUBLE      32#define LONG        33#define CHAR        34#define ENUM        35#define SHORT       36#define SIGNED      37#define STRUCT      38#define UNION       39#define UNSIGNED    40#define VOID        41#define FOR         42#define DO          43#define WHILE       44#define BREAK       45#define CONTINUE    46#define IF          47#define ELSE        48#define GOTO        49#define SWITCH      50#define CASE        51#define DEFAULT     52 #define RETURN      53#define AUTO        54#define EXTERN      55#define REGISTER    56#define STATIC      57#define CONST       58#define SIZEOF      59#define TYPEDEF     60#define VOLATILE    61// Arithmetic operation#define ADD         80#define SUB         81#define MUL         82#define DIS         83#define SUBEQU      84#define ADDEQU      85#define MULEQU      86#define DISEQU      87#define SLFADD      88#define SLFSUB      89#define ASSIGN      90// Realtion operator#define LT         91#define LE          92#define EQ          93#define NE          94#define GT          95#define GE          96//Errors    #define WRONG_IDENTIFIER        100#define ANNOTATION_NOT_MATCH    101#define QUOTATION_NOT_MATCH     102#define WRONG_CHARACTER          103// 全局常量定义int LINE_COUNT = 1, CharacterCount = 0;  // 行计数，字符计数// 不同类型单词计数int KeyWordCount = 0, IdentifierCount = 0, SeparatorCount = 0, NumberCount = 0; int StringCount = 0, AnnotationCount = 0, PretreatmentCount = 0, OperatorCount = 0;int ErrorCount = 0;// 输出流int outSource = 0; // 文件指针char output_file_name[40];char input_file_name[40];FILE *fpin, *fpout;// 辅助函数定义：void KeyWord(int type);void Identifier(char *yytext);void Number(char *yytext);void Operator(int type);void String(char *yytext);void Separator(char *yytext);void SingleLineAnnotation(char *yytext);void MultiLineAnnotation(char *yytext);void Pretreatment(char *yytext);/*ERROR*/void RaiseError(int type);/* 功能函数 */int CountCharacter(char *text);    //text中非空字符个数/*主函数*/int main(int argc, char* argv[]);
%}delim                   [ \t]
letter                  [A-Za-z]
digit                   [0-9]identifier              {letter}({letter}|{digit})*
number                  {digit}+(\.{digit}+)?((e|E)[+\-]?{digit}+)?
string                  (\"(.)*?\")|('.?')
separator               [\(\);\{\}\[\],]     
annotation1             \/\/.*?
annotation2             \/\*[^\*\/]*?\*\/
whitespace              {delim}+
pretreatment            #.*wrong_identifier        {digit}+{letter}({letter}|{digit})*
quotation_not_match     (\'|\")
annotation_not_match    (\*\/)|(\*\/)
wrong_character         .enter                   [\n]
%%int         {KeyWord(INT);}
float       {KeyWord(FLOAT);}
double      {KeyWord(DOUBLE);}
long        {KeyWord(LONG);}
char        {KeyWord(CHAR);}
enum        {KeyWord(ENUM);}
short       {KeyWord(SHORT);}
signed      {KeyWord(SIGNED);}
struct      {KeyWord(STRUCT);}
union       {KeyWord(UNION);}
unsigned    {KeyWord(UNSIGNED);}
void        {KeyWord(VOID);}
for         {KeyWord(FOR);}
do          {KeyWord(DO);}
while       {KeyWord(WHILE);}
break       {KeyWord(BREAK);}
continue    {KeyWord(CONTINUE);}
if          {KeyWord(IF);}
else        {KeyWord(ELSE);}
goto        {KeyWord(GOTO);}
switch      {KeyWord(SWITCH);}
case        {KeyWord(CASE);}
default     {KeyWord(DEFAULT);}
return      {KeyWord(RETURN);}
auto        {KeyWord(AUTO);}
register    {KeyWord(REGISTER);}
extern      {KeyWord(EXTERN);}
static      {KeyWord(STATIC);}
const       {KeyWord(CONST);}
sizeof      {KeyWord(SIZEOF);}
typedef     {KeyWord(TYPEDEF);}
volatile    {KeyWord(VOLATILE);}\=          {Operator(ASSIGN);}
\<          {Operator(LT);}
\<\=        {Operator(LE);}
\=\=        {Operator(EQ);}
\<\>        {Operator(NE);}
\>          {Operator(GT);}
\>\=        {Operator(GE);}\+          {Operator(ADD);}
\-          {Operator(SUB);}
\*          {Operator(MUL);}
\/          {Operator(DIS);}
\+\=        {Operator(ADDEQU);}
\-\=        {Operator(SUBEQU);}
\*\=        {Operator(MULEQU);}
\/\=        {Operator(DISEQU);}
\+\+        {Operator(SLFADD);}
\-\-        {Operator(SLFSUB);}{identifier}        {Identifier(yytext);}
{number}            {Number(yytext);}
{string}            {String(yytext);}
{separator}         {Separator(yytext);}
{annotation1}       {SingleLineAnnotation(yytext);}
{annotation2}       {MultiLineAnnotation(yytext);}
{pretreatment}      {Pretreatment(yytext);}{whitespace}        {}{wrong_identifier}          {RaiseError(WRONG_IDENTIFIER);}
{quotation_not_match}       {RaiseError(QUOTATION_NOT_MATCH);}
{annotation_not_match}      {RaiseError(ANNOTATION_NOT_MATCH);}
{wrong_character}           {RaiseError(WRONG_CHARACTER);}{enter}                     {LINE_COUNT++;}%%int CountCharacter(char *text) {int ans = 0;for(int i = 0; i < strlen(text); ++i) {if(text[i] != ' ' && text[i] != '\t' && text[i] != '\n') {ans += 1;}}return ans;
}void KeyWord(int type) {CharacterCount += strlen(yytext);KeyWordCount += 1;fprintf(fpout, "line: %d <关键字, %s, %d>\n", LINE_COUNT, yytext, type);
}void Operator(int type) {CharacterCount += strlen(yytext);OperatorCount += 1;fprintf(fpout, "line: %d <操作符, %s, %d>\n", LINE_COUNT, yytext, type);
}void Identifier(char *yytext) {CharacterCount += strlen(yytext);IdentifierCount += 1; fprintf(fpout, "line: %d <标识符, %s>\n", LINE_COUNT, yytext);
}void Number(char *yytext) {CharacterCount += strlen(yytext);NumberCount += 1;fprintf(fpout, "line: %d <数字, %s>\n", LINE_COUNT, yytext);
}void String(char *yytext) {CharacterCount += CountCharacter(yytext);StringCount += 1;fprintf(fpout, "line: %d <字符串, %s>\n", LINE_COUNT, yytext);
}void Separator(char *yytext) {CharacterCount += strlen(yytext);SeparatorCount += 1;fprintf(fpout, "line: %d <分隔符, %s>\n", LINE_COUNT, yytext);
}void SingleLineAnnotation(char *yytext) {char ans[1000] = {""};strncpy(ans, yytext, strlen(yytext)-1);AnnotationCount += 1;fprintf(fpout, "line: %d <单行注释, %s>\n", LINE_COUNT, ans);
}void MultiLineAnnotation(char *yytext) {int begin_line = LINE_COUNT;for(int i = 0; i < strlen(yytext); ++i) {if(yytext[i] == '\n') {LINE_COUNT++;yytext[i] = ' ';}}AnnotationCount += 1;fprintf(fpout, "line: %d-%d <多行注释, %s>\n",begin_line, LINE_COUNT, yytext);
}void Pretreatment(char *yytext){CharacterCount += CountCharacter(yytext);PretreatmentCount += 1;fprintf(fpout, "line: %d <预处理, %s>\n", LINE_COUNT, yytext);
}void RaiseError(int type){ErrorCount += 1;fprintf(fpout, "line: %d <ERROR, %d>\n", LINE_COUNT, type); 
}int main(int argc, char* argv[]) {memcpy(input_file_name, argv[1], sizeof(char) * strlen(argv[1]));// open the input file.if((fpin=fopen(input_file_name, "r")) == NULL) {printf("can't open the file: %s", argv[1]);exit(0);}yyin = fpin;if(argc == 3) {outSource = 1;memcpy(output_file_name, argv[2], sizeof(char) * strlen(argv[2]));// open the output stream.if((fpout=fopen(output_file_name, "w")) == NULL) {printf("cannot write the file\n");exit(0);}}fprintf(fpout, "Lexical Analysis\n");fprintf(fpout, "----------------------------------------------------------\n");fprintf(fpout, "input source: %s, output source: %s\n", input_file_name, output_file_name);fprintf(fpout, "----------------------------------------------------------\n");yylex();fprintf(fpout, "\n");fprintf(fpout, "----------------------------------------------------------\n");fprintf(fpout, "There are %d lines, and %d characters\n", LINE_COUNT, CharacterCount);fprintf(fpout, "There are %d KeyWord, %d Identifier, %d Number, %d Separator\n",\KeyWordCount, IdentifierCount, NumberCount, SeparatorCount);fprintf(fpout, "There are %d String, %d Annotation, %d Operator, %d Pretreatment\n",\StringCount, AnnotationCount, OperatorCount, PretreatmentCount);fprintf(fpout, "Total Errors : %d\n", ErrorCount);fprintf(fpout, "----------------------------------------------------------\n");fclose(fpout);yywrap();return 0;
}