title: 词法分析
tags:
- 词法分析
- 编译原理
author: 半坡上
img: 'https://cdn.jsdelivr.net/gh/yifangtan/YiPhoto@main/Photo/ok.jpeg'
toc: true
categories: 编译原理
summary: 这是编译原理中一个小实验:设计并实现一个词法分析器,相关代码如下。
abbrlink: 45309
date: 2021-12-23 18:59:01
一、实验目的
设计并实现一个词法分析器,深刻理解编译原理中词法分析器的原理。
二、实验内容
通过使用自己熟悉的语言设计并实现一个词法分析器,是此法分析器按要求的格式输出经过分析的程序段。
要求分析一下程序片段:
//example.txt文件
const a=10;
var b,c;
procedure p;
begin
c:=b+a;
end;
begin
read(b);
while b#0 do
begin
call p;writeln(2*c);read(b);
end
end.
该程序片段存放在example.txt中。
三、实验步骤
1、对PL\0文法中各类单词分类:
(1)保留字:const、var、procedure、begin、end、if、then、while、do、 read、call、write、writeln
(2)常数:由0-----9这几个数字组成
(3)标识符:由字母打头的字母和数字的字符串
(4)运算符:+,-,*,/,:=,>=,<=,#
(5)分界符:,、.、(、)、;
2、将各类单词对应到lex中:
(1)保留字:reservedWord [const|var|procedure|begin|end|if|then|while|do|read|call|write|writeln]
,
由于在lex中不区分大小写,所以将保留字写成:reservedWord
[cC][oO][nN][sS][tT]|[vV][aA][rR]|[pP][rR][oO][cC][eE][dD][uU][rR][eE]|
[bB][eE][gG][iI][nN]|[eE][nN][dD]|[iI][fF]|[tT][hH][eE][nN]|[wW][hH][iI]
[lL][eE]|[dD][oO]|[rR][eE][aA][dD]|[cC][aA][lL][lL]|[wW][rR][iI][tT][eE]
[wW][rR][iI][tT][eE][lL][nN]
(2)常数:
constant ([0-9])+ /0---9这几个数字可以重复/
(3)标识符:
identfier [A-Za-z]([A-Za-z][0-9])*
(4)运算符:
operator +|-|*|/|:=|>=|<=|#|= /在lex中,有特殊意义的运算符要加转义字符\,如+、及*、/
(5)分界符:
delimiter [,.;]
3、PL/0的语言的词法分析器要求跳过分隔符(如空格,回车,制表符),对应的lex定义为:
delim [""\n\t]
whitespace{delim}+
4、为lex制定一些规则:
{reservedWord}{count++;printf("\t%d\t(1,‘%s’)\n",count,yytext);} /*对保留字定规则,输出设置*/
{operator} {count++;printf("\t%d\t(2,‘%s’)\n",count,yytext); }
{delimiter}{count++;printf("\t%d\t(3,‘%s’)\n",count,yytext);}
{constant}{count++;printf("\t%d\t(4,‘%s’)\n",count,yytext);}
{identfier}{count++;printf("\t%d\t(5,‘%s’)\n",count,yytext);}
{whitespace} {/* do nothing*/ } /*遇到空格,什么都不做*/
四、代码设计
本程序使用c语言编写而成,采用栈存储文件数据(一开始想得数组,但又想到定义数组开辟空间不好把握,容易造成资源浪费,故而采用栈式存储,空间不够可以适量开辟空间,很大程度上缩减资源浪费。)然后就是各个函数中遍历检索所得数据,再构造一个数组,存放检索后的数据,输出即可!完整代码如下:
#include <stdio.h>
#include <stdlib.h>
#include<stdbool.h>
#include<string.h>
#define STACK_INIT_SIZE 100 /*存储空间初始分配量*/
#define STACKINCREMENT 20 /*存储空间分配增量*/
typedef struct {
char *top;
char *base;
int stacksize;
}SqStack;
int InitStack (SqStack *s) { /*构造一个空栈*/
s -> base = (char*)malloc(STACK_INIT_SIZE*sizeof(char));
s -> top = s -> base;
s -> stacksize = STACK_INIT_SIZE;
return 1;
}
int push (SqStack *s, char e) { /*插入元素e为新的栈顶元素*/
if (s -> top - s -> base >= s -> stacksize) {
s -> base = (char*)realloc(s -> base, (s -> stacksize + STACKINCREMENT)*sizeof(char));
if (!s -> base) { /*栈溢出*/
return -1;
}
s -> top = s -> base + s -> stacksize;
s -> stacksize += STACKINCREMENT;
}
*(s -> top) = e; /*先赋值再将指针加1*/
s -> top ++;
return 1;
}
int pop (SqStack *s, char *e) { /*出栈*/
if (s -> top == s -> base) { /*栈空*/
return 1;
} else {
*e=*s->base;
s->base++;
}
return 0;
}
int recover(SqStack s,int size){ /*恢复栈底指针指向*/
s.base = s.top - size;
return 0;
}
_Bool isEmpty (SqStack *s) { /*判断栈是否为空*/
if (s -> top == s -> base) {
return 1;
} else {
return 0;
}
}
int reservedWord(SqStack s,char *yytext) // 保留字
{
char remains[13][100] =
{
"const",
"var",
"procedure",
"begin",
"end",
"if",
"then",
"while",
"do",
"read",
"call",
"writeln"
};
int size = s.top - s.base;
//printf("输出文件的Size: %d\n",size);
char temp[size];
memset(temp, '\0', size);
int t = 0;
while(isEmpty(&s) == false){
char val;
pop(&s,&val);
// printf("输出栈中值:%c\n",val);
if((val>= 65 && val <= 90) || (val >= 97 && val <= 122))//判断保留字
{
// printf("输出栈中值:%c\n",val);
temp[t++] = val;
for (int a = 0; a < 13; a++)
{
if (strcmp(temp, remains[a]) == 0)
{
strcat(yytext,remains[a]);
strcat(yytext," ");
memset(temp, '\0', t);
t = 0;
}
}
}
else
{
if(strcmp(temp,"write") == 0)
{
strcat(yytext,"write");
strcat(yytext," ");
}
memset(temp, '\0', t);
t = 0;
}
}
s.base = s.top - size; //恢复指针指向;
}
void operator_(SqStack s,char *yytext){ // 运算符
char val;
while(isEmpty(&s) == false){
pop(&s,&val);
switch(val){
case '+':
strcat(yytext,"\\+");
strcat(yytext," ");
break;
case '-':
strcat(yytext,"\\-");
strcat(yytext," ");
break;
case '/':
if(*(s.base) == '*'){
strcat(yytext,"/*");
strcat(yytext," ");
s.base++;
}
else{
strcat(yytext,"\\/");
strcat(yytext," ");
}
break;
case '*':
strcat(yytext,"\\*");
strcat(yytext," ");
break;
case ':':
if(*(s.base) == '='){
strcat(yytext,":=");
strcat(yytext," ");
s.base++;
}
break;
case '>':
if(*(s.base) == '='){
strcat(yytext,">=");
strcat(yytext," ");
s.base++;
}
else{
strcat(yytext,">");
strcat(yytext," ");
}
break;
case '<':
if(*(s.base) == '='){
strcat(yytext,"<=");
strcat(yytext," ");
s.base++;
}
else{
strcat(yytext,"<");
strcat(yytext," ");
}
break;
case '#':
strcat(yytext,"#");
strcat(yytext," ");
break;
case '=':
strcat(yytext,"=");
strcat(yytext," ");
break;
}
}
}
int constant(SqStack s, char *yytext){ //数字 0 ~ 9
char val;
char a[30];
memset(a, '\0', 30);
int i = 0;
while(isEmpty(&s) == false){
pop(&s,&val);
if((val >= 48 && val <= 57)){
a[i++] = val;
a[i++] = ' ';
}
}
strcat(yytext,a);
strcat(yytext," ");
}
void identfier(SqStack s, char *yytext){ // 标识符
char remains[13][100] =
{
"const",
"var",
"procedure",
"begin",
"end",
"if",
"then",
"while",
"do",
"read",
"call",
"writeln"
};
int size = s.top - s.base;
char temp[size];
memset(temp, '\0', size);
int t = 0;
while(isEmpty(&s) == false){
char val;
pop(&s,&val);
// printf("输出栈中值:%c\n",val);
if((val>= 65 && val <= 90) || (val >= 97 && val <= 122) || (val >= 48 && val <= 57))//判断基本字和标识符
{
// printf("输出栈中值:%c\n",val);
temp[t++] = val;
for (int a = 0; a < 13; a++)
{
if (strcmp(temp, remains[a]) == 0){
memset(temp, '\0', t);
t = 0;
}
}
}
else
{
if(strcmp(temp,"write") == 0)
{
memset(temp, '\0', t);
t = 0;
}
if(temp[0] >= 48 && temp[0] <= 57){
memset(temp, '\0', t);
t = 0;
}
if(temp[0] != '\0'){
strcat(yytext,temp);
strcat(yytext," ");
}
memset(temp, '\0', t);
t = 0;
}
}
}
void delimiter(SqStack s, char *yytext){ //分隔符
char val;
while(isEmpty(&s) == false){
pop(&s,&val);
switch(val){
case ':':
strcat(yytext,":");
strcat(yytext," ");
break;
case '.':
strcat(yytext,".");
strcat(yytext," ");
break;
case ';':
strcat(yytext,";");
strcat(yytext," ");
break;
}
}
}
int yywrap()
{
return 1;
}
int yylex(SqStack s)
{
char yytext[1024];
memset(yytext, '\0', strlen(yytext));
int count = 1;
int size = s.top - s.base;
while(count < 6){
if(count == 1){
reservedWord(s,yytext);
printf("\t%d\t(1,‘ %s’)\n",count,yytext);
memset(yytext, '\0', strlen(yytext));
count++;
}
else if(count == 2){
operator_(s,yytext);
printf("\t%d\t(2,‘ %s’)\n",count,yytext);
recover(s,size);
memset(yytext, '\0', strlen(yytext));
count++;
}
else if(count == 3){
delimiter(s,yytext);
printf("\t%d\t(3,‘ %s’)\n",count,yytext);
recover(s,size);
memset(yytext, '\0', strlen(yytext));
count++;
}
else if(count == 4){
constant(s,yytext);
printf("\t%d\t(4,‘ %s’)\n",count,yytext);
recover(s,size);
memset(yytext, '\0', strlen(yytext));
count++;
}
else if(count == 5){
identfier(s,yytext);
printf("\t%d\t(5,‘ %s’)\n",count,yytext);
recover(s,size);
memset(yytext, '\0', strlen(yytext));
count++;
}
// whitespace(); /* do nothing*/
// if(count > 5)
}
printf("文件读取成功!yywrap返回值:%d\n",yywrap());
}
int main()
{
system("color 4");
SqStack s;
InitStack(&s);
char yyout;
char file[1024];
int len = 0;
FILE *yyin;
printf("词法分析器输出类型说明:\n");
printf("1:保留字\n");
printf("2:运算符\n");
printf("3:分界符\n");
printf("4:常 数\n");
printf("5:标识符\n");
printf("\n");
yyin = fopen("example.txt","r");
if(yyin == NULL){
printf("文件打开失败!");
}
else{
printf("文件打开成功!\n");
yyout = fgetc(yyin);
while(!feof(yyin)){
push(&s,yyout);
len++;
yyout = fgetc(yyin);
}
yylex(s);
}
fclose(yyin);
system("PAUSE");/*暂停*/
}
当然,代码还有很多缺点,例如,代码很长,有很多地方可以整合缩减。也有很多地方数据冗余,可能是调试的时候忘记删除了,不过输出结果还是符合实验要求的。
博主真是太厉害了!!!
叼茂SEO.bfbikes.com
叼茂SEO.bfbikes.com
叼茂SEO.bfbikes.com
不错不错,我喜欢看 https://www.jiwenlaw.com/
不错不错,我喜欢看 https://www.237fa.com/
看的我热血沸腾啊https://www.ea55.com/
不错不错,我喜欢看 www.jiwenlaw.com
想想你的文章写的特别好www.jiwenlaw.com