error in coding a lexer in c
Posted
by mekasperasky
on Stack Overflow
See other posts from Stack Overflow
or by mekasperasky
Published on 2010-04-19T17:32:37Z
Indexed on
2010/04/20
2:43 UTC
Read the original article
Hit count: 510
#include<stdio.h>
#include<ctype.h>
#include<string.h>
/* this is a lexer which recognizes constants , variables ,symbols, identifiers , functions , comments and also header files . It stores the lexemes in 3 different files . One file contains all the headers and the comments . Another file will contain all the variables , another will contain all the symbols. */
int main()
{
int i=0,j,k,count=0;
char a,b[100],c[10000],d[100];
memset ( d, 0, 100 );
j=30;
FILE *fp1,*fp2;
fp1=fopen("source.txt","r"); //the source file is opened in read only mode which will passed through the lexer
fp2=fopen("lext.txt","w");
//now lets remove all the white spaces and store the rest of the words in a file
if(fp1==NULL)
{
perror("failed to open source.txt");
//return EXIT_FAILURE;
}
i=0;
k=0;
while(!feof(fp1))
{
a=fgetc(fp1);
if(a!=' '&&a!='\n')
{
if (!isalpha(a))
{
switch(a)
{
case '+':{fprintf(fp2,"+ ----> PLUS \n");
i=0;break;}
case '-':{fprintf(fp2,"- ---> MINUS \n");
i=0;break;}
case '*':{fprintf(fp2, "* --->MULT \n");
i=0;break;}
case '/':{fprintf(fp2, "/ --->DIV \n");
i=0;break;}
//case '+=':fprintf(fp2, "%.20s\n", "ADD_ASSIGN");
//case '-=':fprintf(fp2, "%.20s\n", "SUB_ASSIGN");
case '=':{fprintf(fp2, "= ---> ASSIGN \n");
i=0;break;}
case '%':{fprintf(fp2, "% ---> MOD \n");
i=0;break;}
case '<':{fprintf(fp2, "< ---> LESSER_THAN \n");
i=0;break;}
case '>':{fprintf(fp2, "> --> GREATER_THAN \n");
i=0;break;}
//case '++':fprintf(fp2, "%.20s\n", "INCREMENT");
//case '--':fprintf(fp2, "%.20s\n", "DECREMENT");
//case '==':fprintf(fp2, "%.20s\n", "ASSIGNMENT");
case ';':{fprintf(fp2, "; --->SEMI_COLUMN \n");
i=0;break;}
case ':':{fprintf(fp2, ": --->COLUMN \n");
i=0;break;}
case '(':{fprintf(fp2, "( --->LPAR \n");
i=0;break;}
case ')':{fprintf(fp2, ") --->RPAR \n");
i=0;break;}
case '{':{fprintf(fp2, "{ --->LBRACE \n");
i=0;break;}
case '}':{fprintf(fp2, "} ---> RBRACE \n");
i=0;break;}
}
}
else
{
d[i]=a;
//printf("%c\n",d[i]);
i=i+1;
}
//}
/* we can make the lexer more complex by including even more depths of checks for the symbols*/
}
else
{
d[i+1]='\0';
printf("\n");
if((strcmp(d,"if ")==0)){fprintf(fp2,"if ----> IDENTIFIER \n");
//printf("%s \n",d);
memset ( d, 0, 100 );
//printf("%s \n",d);
count=count+1;}
else if(strcmp(d,"then")==0){fprintf(fp2,"then ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"else")==0){fprintf(fp2,"else ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"switch")==0){fprintf(fp2,"switch ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"printf")==0){fprintf(fp2,"prtintf ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"scanf")==0){fprintf(fp2,"scanf ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"NULL")==0){fprintf(fp2,"NULL ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"int")==0){fprintf(fp2,"INT ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"char")==0){fprintf(fp2,"char ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"float")==0){fprintf(fp2,"float ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"long")==0){fprintf(fp2,"long ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"double")==0){fprintf(fp2,"double ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"const")==0){fprintf(fp2,"const ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"continue")==0)fprintf(fp2,"continue ----> IDENTIFIER \n");
else if(strcmp(d,"size of")==0){fprintf(fp2,"size of ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"register")==0){fprintf(fp2,"register ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"short")==0){fprintf(fp2,"short ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"auto")==0){fprintf(fp2,"auto ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"while")==0){fprintf(fp2,"while ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"do")==0){fprintf(fp2,"do ----> IDENTIFIER \n");
count=count+1;}
else if(strcmp(d,"case")==0){fprintf(fp2,"case ----> IDENTIFIER \n");
count=count+1;}
else if (isdigit(d[i]))
{
fprintf(fp2,"%s ---->NUMBER",d);
}
else if (isalpha(a))
{
fprintf(fp2,"%s ----> Variable",d);
//printf("%s",d);
// memset ( d, 0, 100 );}
//fprintf(fp2, "s\n", b);
i=0;
k=k+1;
continue;
}
i=i+1;
k=k+1;
}
fclose(fp1);
fclose(fp2);
printf("%d",count);
return 0;
}
In this code , my source.txt has if (a+b) stored . But only ( , + and ) is getting written into lext.txt and not the identifier if or the variable a and b . Any particular reason why?
© Stack Overflow or respective owner