#include <cstdio>
#include <corecrt_malloc.h>
#include <string.h>
#include <iostream>
#define NUM_OF_KWORDS 2
char* keywords[NUM_OF_KWORDS] = { "for", "do" };
enum states { H, ID, NM, ASGN, DLM, ERR };
enum tok_names { KWORD, IDENT, NUM, OPER, DELIM };
struct token
{
enum tok_names token_name;
char* token_value;
};
struct lexeme_table
{
struct token tok;
struct lexeme_table* next;
};
struct lexeme_table* lt = NULL;
struct lexeme_table* lt_head = NULL;
int lexer(char* filename);
int is_kword(char* id);
int add_token(struct token* tok);
int lexer(char* filename)
{
FILE* fd;
int c, err_symbol;
struct token tok;
if ((fd = fopen(filename, "r")) == NULL)
{
printf("\nCannot open file %s.\n", filename);
return -1;
}
enum states CS = H;
c = fgetc(fd);
while (!feof(fd))
{
switch (CS)
{
case H:
{
while ((c == ' ') || (c == '\t') || (c == '\n'))
{
c = fgetc(fd);
}
if (((c >= 'A') && (c <= 'Z')) ||
((c >= 'a') && (c <= 'z')) || (c == '_'))
{
CS = ID;
}
else if (((c >= '0') && (c <= '9')) || (c == '.') ||
(c == '+') || (c == '-'))
{
CS = NM;
}
else if (c == ':')
{
CS = ASGN;
}
else {
CS = DLM;
}
break;
}// case H
case ASGN:
{
int colon = c;
c = fgetc(fd);
if (c == '=')
{
tok.token_name = OPER;
if ((tok.token_value = (char*)malloc(sizeof(2))) == NULL)
{
printf("\nMemory allocation error in function \"lexer\"\n");
return -1;
}
strcpy(tok.token_value, ":=");
add_token(&tok);
c = fgetc(fd);
CS = H;
}
else {
err_symbol = colon;
CS = ERR;
}
break;
}// case ASGN
case DLM:
{
if ((c == '(') || (c == ')') || (c == ';'))
{
tok.token_name = DELIM;
if ((tok.token_value =
(char*)malloc(sizeof(1))) == NULL)
{
printf("\nMemory allocation error in function \"lexer\"\n");
return -1;
}
sprintf(tok.token_value, "%c", c);
add_token(&tok);
c = fgetc(fd);
CS = H;
}
else if ((c == '<') || (c == '>') || (c == '='))
{
tok.token_name = OPER;
if ((tok.token_value =
(char*)malloc(sizeof(1))) == NULL)
{
printf("\nMemory allocation error in functio \"lexer\"\n");
return -1;
}
sprintf(tok.token_value, "%c", c);
add_token(&tok);
c = fgetc(fd);
CS = H;
}
else {
err_symbol = c;
c = fgetc(fd);
CS = ERR;
}// if((c == '(') || (c == ')') || (c == ';'))
break;
}// case DLM
case ERR:
{
printf("\nUnknown character: %c\n", err_symbol);
CS = H;
break;
}
case ID:
{
int size = 0;
char buf[256];
buf[size] = c;
size++;
c = fgetc(fd);
while (((c >= 'A') && (c <= 'Z')) || ((c >= 'a') &&
(c <= 'z')) || ((c >= '0') && (c <= '9')) ||
(c == '_'))
{
buf[size] = c;
size++;
c = fgetc(fd);
}
buf[size] = '\0';
if (is_kword(buf))
{
tok.token_name = KWORD;
}
else {
tok.token_name = IDENT;
}
if ((tok.token_value = (char*)malloc(strlen(buf))) == NULL)
{
printf("\nMemory allocation error in function \"lexer\"\n");
return -1;
}
strcpy(tok.token_value, buf);
add_token(&tok);
CS = H;
break;
} // case ID
} // switch
} // while
} // int lexer(...)