C++-c语言词法分析器
一、运行截图
- 对于 Test.c 的词法分析结果
- 对于词法分析器本身的源代码的分析结果
二、主要功能
经过不断的修正和测试代码,分析测试结果,该词法分析器主要实现了以下功能:
1. 识别关键字
- 实验要求:if else while do for main return int float double char。以及:
- 数据类型关键字 void unsigned long enum static short signed struct union;
- 控制语句关键字 continue break switch case default goto;
- 存储类型关键字 auto static extern register;
- 其他关键字 const volatile sizeof typedef;
- 预编译指令 #xxx。
2. 识别运算符
- 实验要求:= + - * / % < <= > >= != ==
- 算术运算符 ++ –
- 逻辑运算符 && || !
- 位运算符 & | ^ ~ << >>
- 赋值运算符 += -= *= /= %= <<= >>= &= ^= |=
- 杂项运算符 , ? : -> .
3. 识别界限符
- 实验要求:; ( ) { }
- 其他:[ ]
4. 识别常量
- 实验要求:无符号十进制整型常量,正规式为:(1-9)(0-9)*
- 无符号二进制整型常量,正规式为: 0(B|b)(0-1)*
- 无符号八进制整型常量,正规式为: 0(0-7)*
- 无符号十六进制整型常量,正规式为: 0(X|x)(0-9|a-f|A-F)*
- 字符串常量,正规式为: “(all char)*”
- 字符常量,正规式为: ‘(all char)*’
5. 识别标识符
- 实验要求:以字母开头,正规式为:letter(letter|digit)*
- 以下划线开头,正规式为:_ (letter|digit|$ | _) *
- 以美元符号开头,正规式为:$ (letter|digit|$ | _ ) *
6. 识别其他符号
- 实验要求:空格符(’ ‘),制表符(’\\t’),换行符(‘\\n’)
- 单行注释(“//”)
- 多行注释(“/*”)
- 转义字符(‘\\’)
特别的,在预处理阶段,识别单/多行注释时,遇到了一个小问题,由于要支持字符串常量和字符常量,而该常量中可能包含//,/*,导致将字符串内容识别为注释,另外,由于需要支持转义字符‘\\’的存在,正确识别的字符串,也成为了一大问题,在多次修改匹配代码后,终于将问题解决。
7. 词法高亮
利用词法分析的token结果,按不同的类型码,对源代码文件进行着色输出。
运算符和界限符:黄色。
关键字:淡蓝色。
无符号二,八,十,十六进制整型常量:淡红色。
标识符:淡绿色。
字符串常量:淡黄色。
8. 词法错误处理
当朴素匹配或者正规匹配失败,或者匹配结束后,回退字符之前,出现非法字符时,对相应的错误进行记录并在分析完毕后进行输出。
目前已处理错误:未找到匹配的界限符()]}),多行注释未找到结束符(*/),出现未定义的字符(@¥`)。
值得注意的一点,当遇到未处理的词法错误时,程序存在可能的崩溃隐患。
三、项目内容
1. 测试代码 Test.c
/* used to test keywords* //ohhhhhhhhhhhhhhh*/
void $Func$ForKey_2(int a, unsigned b)
{
start: if (1){}else{}while (1) { continue; }do{}while (1);for (;;){ }}switch (1){case 1: break;default: break;}enum { run };goto start;
}struct FStruct{int val = 0000x1234;};
union FUnion{int val = 0B1 + 0b1 + 01 + 0X1 + 0x1;};
extern i;// used to test operator
void __FuncForOperation5(const float c, long d, signed e, short f)
{typedef int Moota;register int i = 1234 + sizeof(Moota) + sizeof("//hello world//\\\\") + sizeof('\\a');volatile int j = i + 1 - 1 * 1 / 1 % 1 <= 1 < 1 > 1 >= 1 != 1 == 1;auto int m = (i++) + (++i) + (i && i || i & i & i | i ^ i << 1) + i >> 1 + ~i;m += 1;m -= 2;m *= 3;m / 4;m %= 5;m <<= 6;m >>= 7;m &= 8;m |= 9;m ^= 10;m = m ? 1 : 0;struct FStruct* Ohhh;Ohhh->val;struct FStruct Emmm;Emmm.val;
}/*
int main(void)
{$Func$ForKey_2(1, 1);__FuncForOperation5(1, 1, 1, 1);return 0;
}
2. 项目代码 LexicalAnalyzer.cpp
/ Copyright Moota, Private. All Rights Reserved. */#include <iostream>
#include <map>
#include <stack>
#include <vector>
#include <string>
#include <fstream>
#include "Windows.h"/* 词法分析器* 可以识别的单词种类包括以下部分(括号代表实验要求之外的单词)* * (1) 关键字* if else while do for main return int float double char* (数据类型关键字 void unsigned long enum static short signed struct union)* (控制语句关键字 continue break switch case default goto)* (存储类型关键字 auto static extern register)* (其他关键字 const volatile sizeof typedef)* (预编译指令 #xxx)* * (2) 运算符* = + - * / % < <= > >= != ==* (算术运算符 ++ --)* (关系运算符)* (逻辑运算符 && || !)* (位运算符 & | ^ ~ << >>)* (赋值运算符 += -= *= /= %= <<= >>= &= ^= |=)* (杂项运算符 , ? : -> .)* * (3) 界限符* ; ( ) { }* ([ ])* * (4) 常量* 正规式为 digit1 digit2*,只考虑无符号十进制整型常量,digit1包括1-9,digit2包括0-9)* (无符号二进制整型常量 0(B|b)(0-1)*)* (无符号八进制整型常量 0(0-7)*)* (无符号十六进制整型常量 0(X|x)(0-9|a-f|A-F)*)* (字符串常量 "(all char)*" )* (字符常量 '(all char)*' )* * (5) 标识符* 正规式为 letter(letter|digit)*,区分大小写* (_(letter|digit|$)*)* ($(letter|digit|$)*)* * (6) 其他符号* 空格符(' '),制表符('\\t'),换行符('\\n'),单行多行注释("//","\\/\\*") 应该在词法分析阶段被忽略*/
class FLexicalAnalyzer
{
public:FLexicalAnalyzer(){SetConsoleTitle(L"Lexical Analyzer V1.2.6");ShowWindow(GetForegroundWindow(), SW_MAXIMIZE);SetConsoleOutputCP(65001);}private:// 工具函数// 字符是否小写static inline bool IsLower(char Data){return 'a' <= Data and Data <= 'z';}// 字符是否大写static inline bool IsUpper(char Data){return 'A' <= Data and Data <= 'Z';}// 字符是否是字母字符static inline bool IsLetter(char Data){return IsLower(Data) || IsUpper(Data);}// 字符是否是数字字符static inline bool IsDigit(char Data){return '0' <= Data and Data <= '9';}// 读取文件std::string ReadFile(const std::string& FilePath){std::string Result;std::ifstream InputFile(FilePath, std::ios::in);if (!InputFile.is_open()){std::cerr << "The file failed to open, perhaps you need to change the file path." << "\\n";return Result;}char Ch;while (InputFile.peek() != EOF){InputFile.get(Ch);Result += Ch;}InputFile.close();TokenizeData.FilePath = FilePath;return Result;}// 保存文件static void SaveFile(const std::string& Data, const std::string& FilePath){std::ofstream OutputFile(FilePath, std::ios::out);OutputFile << Data;OutputFile.close();}private:std::map<std::string, int> KeywordMap; // 关键字符号表std::map<std::string, int> RegularMap; // 正规(常量,标识符)符号表std::map<std::string, int> SpecialMap; // 特殊(运算符,界限符)符号表// 单词序列数据struct FTokenData{std::string Token; // 单词符号int Code = 0; // 类别码};// 词法分析数据struct FTokenizeData{std::string FilePath; // 文件地址std::string Result; // 最终结果std::string Data; // 待分析数据size_t Size = 0; // 数据长度size_t Index = 0; // 当前字符索引char Start = ' '; // 当前字符FTokenData TokenData; // 当前符号信息std::stack<int> Delimiter[3]; // 界限符栈size_t Lines = 0; // 词法分析当前行std::vector<std::string> Errors; // 词法分析错误池std::vector<std::string> Warnings; // 词法分析警告池} TokenizeData;/* 初始化各个符号表*/void Initialize(){KeywordMap["main"] = 1;KeywordMap["if"] = 2;KeywordMap["else"] = 3;KeywordMap["while"] = 4;KeywordMap["do"] = 5;KeywordMap["for"] = 6;KeywordMap["return"] = 7;RegularMap["letter(letter|digit)*"] = 8;RegularMap["digit digit*"] = 9;RegularMap["\\"(all char)*\\""] = 200;RegularMap["\\'(all char)*\\'"] = 201;RegularMap["_(letter|digit|$)*"] = 202;RegularMap["$(letter|digit|$)*"] = 203;RegularMap["0(B|b)(0-1)*"] = 204;RegularMap["0(0-7)*"] = 205;RegularMap["0(X|x)(0-9 a-f)*"] = 206;SpecialMap["+"] = 10;SpecialMap["-"] = 11;SpecialMap["*"] = 12;SpecialMap["/"] = 13;SpecialMap["%"] = 14;SpecialMap[">"] = 15;SpecialMap[">="] = 16;SpecialMap["<"] = 17;SpecialMap["<="] = 18;SpecialMap["=="] = 19;SpecialMap["!="] = 20;SpecialMap["="] = 21;SpecialMap[";"] = 22;SpecialMap["("] = 23;SpecialMap[")"] = 24;SpecialMap["{"] = 25;SpecialMap["}"] = 26;SpecialMap["["] = 100;SpecialMap["]"] = 101;SpecialMap[","] = 102;SpecialMap[":"] = 103;SpecialMap["++"] = 104;SpecialMap["--"] = 105;SpecialMap["&&"] = 106;SpecialMap["||"] = 107;SpecialMap["!"] = 108;SpecialMap["&"] = 109;SpecialMap["|"] = 110;SpecialMap["^"] = 111;SpecialMap["~"] = 112;SpecialMap["<<"] = 113;SpecialMap[">>"] = 114;SpecialMap["+="] = 115;SpecialMap["-="] = 116;SpecialMap["*="] = 117;SpecialMap["/="] = 118;SpecialMap["%="] = 119;SpecialMap["<<="] = 120;SpecialMap[">>="] = 121;SpecialMap["&="] = 122;SpecialMap["|="] = 123;SpecialMap["^="] = 124;SpecialMap["?"] = 125;SpecialMap["->"] = 126;SpecialMap["."] = 127;KeywordMap["int"] = 27;KeywordMap["float"] = 28;KeywordMap["double"] = 29;KeywordMap["char"] = 30;KeywordMap["void"] = 31;KeywordMap["unsigned"] = 32;KeywordMap["long"] = 33;KeywordMap["const"] = 34;KeywordMap["continue"] = 35;KeywordMap["break"] = 36;KeywordMap["enum"] = 37;KeywordMap["switch"] = 38;KeywordMap["case"] = 39;KeywordMap["static"] = 40;KeywordMap["auto"] = 41;KeywordMap["short"] = 42;KeywordMap["signed"] = 43;KeywordMap["struct"] = 44;KeywordMap["union"] = 45;KeywordMap["goto"] = 46;KeywordMap["default"] = 47;KeywordMap["extern"] = 48;KeywordMap["register"] = 49;KeywordMap["volatile"] = 50;KeywordMap["sizeof"] = 51;KeywordMap["typedef"] = 52;KeywordMap["#"] = 53;std::cout << "///\\n";}/* 数据预处理* 主要处理制表符('\\t')* 空格符将在词法分析时自动进行忽略* 换行符将在词法分析时作为报错的依据*/std::string Preprocess(const std::string& Data){std::string Result;const size_t Size = Data.size();size_t Lines = 0;for (size_t i = 0; i < Size; ++i){if (Data[i] == '"'){Result += Data[i];++i;while (i < Size && Data[i] != '"'){if (Data[i] == '\\\\'){Result += Data[i];++i;}if (i < Size){Result += Data[i];++i;}}if (Data[i] == '"'){Result += Data[i];}}else if (Data[i] == '\\''){Result += Data[i];++i;while (i < Size && Data[i] != '\\''){if (Data[i] == '\\\\'){Result += Data[i];++i;}Result += Data[i];++i;}if (Data[i] == '\\''){Result += Data[i];}}else if (Data[i] == '/' && (i + 1) < Size && Data[i + 1] == '/')// 处理单行注释{while (i < Size && Data[i] != '\\n'){++i;}if (Data[i] == '\\n'){++Lines;Result += Data[i];}}else if (Data[i] == '/' && (i + 1) < Size && Data[i + 1] == '*')// 处理多行注释{i = i + 2;bool IsFound = false;while (i < Size){if (Data[i] == '\\n'){++Lines;}if (Data[i] == '*' && (i + 1) < Size && Data[i + 1] == '/'){IsFound = true;i = i + 2;break;}++i;}if (Data[i] == '\\n'){++Lines;}if (!IsFound){TokenizeData.Errors.push_back("ERR: " + TokenizeData.FilePath + " " + std::to_string(Lines) + " " + "no matching '*/' symbol was found.");}}else if (Data[i] == '\\n'){++Lines;Result += Data[i];}else if (Data[i] == '\\t')// 处理制表符{Result += ' ';}else{Result += Data[i];}}return Result;}/* 获取下一个有效字符,忽略空格符*/bool GetValidChar(){bool Result = false;while (TokenizeData.Index < TokenizeData.Size){TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start != ' '){Result = true;break;}}return Result;}/* 识别以字母为开始符号的* 1. 关键字-letter(letter) 2. 识别标识符-letter(letter|digit)/void TokenizeAlpha(){do{TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];}while (IsLetter(TokenizeData.Start) || IsDigit(TokenizeData.Start) || TokenizeData.Start == '$'|| TokenizeData.Start == '_');const auto Found = KeywordMap.find(TokenizeData.TokenData.Token);if (Found != KeywordMap.end()){TokenizeData.TokenData.Code = Found->second;}else{TokenizeData.TokenData.Code = RegularMap["letter(letter|digit)*"];}TokenizeData.Index--;}/* 识别以数字为开始符号的* 1. 无符号十进制整型常量-digit digit/void TokenizeDigit(){if ('1' <= TokenizeData.Start && TokenizeData.Start <= '9'){do{TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];}while (IsDigit(TokenizeData.Start));TokenizeData.TokenData.Code = RegularMap["digit digit*"];TokenizeData.Index--;}else{TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == 'B' || TokenizeData.Start == 'b'){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];do{TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];}while (TokenizeData.Start == 0 || TokenizeData.Start == 1);TokenizeData.TokenData.Code = RegularMap["0(B|b)(0-1)*"];TokenizeData.Index--;}else if (TokenizeData.Start == 'X' || TokenizeData.Start == 'x'){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];do{TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];}while (IsDigit(TokenizeData.Start) || ('a' <= TokenizeData.Start and TokenizeData.Start <= 'f') || ('A' <= TokenizeData.Start and TokenizeData.Start <= 'F'));TokenizeData.TokenData.Code = RegularMap["0(X|x)(0-9 a-f)*"];TokenizeData.Index--;}else if ('0' <= TokenizeData.Start and TokenizeData.Start <= '7'){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];do{TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];}while ('0' <= TokenizeData.Start and TokenizeData.Start <= '7');TokenizeData.TokenData.Code = RegularMap["0(0-7)*"];TokenizeData.Index--;}else{TokenizeData.TokenData.Code = RegularMap["digit digit*"];TokenizeData.Index--;}}}/* 识别以非数字非字母为开始符号的* 1. 运算符* 2. 界限符*/void TokenizeSpecial(){for (auto& Special : SpecialMap){if (TokenizeData.Start == Special.first[0]){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = Special.second;break;}}if (TokenizeData.Start == '('){TokenizeData.Delimiter[0].push(TokenizeData.Start);}if (TokenizeData.Start == '['){TokenizeData.Delimiter[1].push(TokenizeData.Start);}else if (TokenizeData.Start == '{'){TokenizeData.Delimiter[2].push(TokenizeData.Start);}else if (TokenizeData.Start == ')'){if (!TokenizeData.Delimiter[0].empty()){TokenizeData.Delimiter[0].pop();}else{TokenizeData.Errors.push_back("ERR: " + TokenizeData.FilePath + " " + std::to_string(TokenizeData.Lines) + " there is an unmatched ')' symbol.");}}else if (TokenizeData.Start == ']'){if (!TokenizeData.Delimiter[1].empty()){TokenizeData.Delimiter[1].pop();}else{TokenizeData.Errors.push_back("ERR: " + TokenizeData.FilePath + " " + std::to_string(TokenizeData.Lines) + " there is an unmatched ']' symbol.");}}else if (TokenizeData.Start == '}'){if (!TokenizeData.Delimiter[2].empty()){TokenizeData.Delimiter[2].pop();}else{TokenizeData.Errors.push_back("ERR: " + TokenizeData.FilePath + " " + std::to_string(TokenizeData.Lines) + " there is an unmatched '}' symbol.");}}else if (TokenizeData.Start == '!'){TokenizeData.TokenData.Code = SpecialMap["!"];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["!="];}else{TokenizeData.Index--;}}else if (TokenizeData.Start == '='){TokenizeData.TokenData.Code = SpecialMap["="];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["=="];}else{TokenizeData.Index--;}}else if (TokenizeData.Start == '<'){TokenizeData.TokenData.Code = SpecialMap["<"];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["<="];}else if (TokenizeData.Start == '<'){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["<<"];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["<<="];}else{TokenizeData.Index--;}}else{TokenizeData.Index--;}}else if (TokenizeData.Start == '>'){TokenizeData.TokenData.Code = SpecialMap[">"];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap[">="];}else if (TokenizeData.Start == '>'){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap[">>"];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap[">>="];}else{TokenizeData.Index--;}}else{TokenizeData.Index--;}}else if (TokenizeData.Start == '+'){TokenizeData.TokenData.Code = SpecialMap["+"];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["+="];}else if (TokenizeData.Start == '+'){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["++"];}else{TokenizeData.Index--;}}else if (TokenizeData.Start == '-'){TokenizeData.TokenData.Code = SpecialMap["-"];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["-="];}else if (TokenizeData.Start == '-'){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["--"];}else if (TokenizeData.Start == '>'){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["->"];}else{TokenizeData.Index--;}}else if (TokenizeData.Start == '*'){TokenizeData.TokenData.Code = SpecialMap["*"];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["*="];}else{TokenizeData.Index--;}}else if (TokenizeData.Start == '/'){TokenizeData.TokenData.Code = SpecialMap["/"];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["/="];}else{TokenizeData.Index--;}}else if (TokenizeData.Start == '%'){TokenizeData.TokenData.Code = SpecialMap["%"];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["%="];}else{TokenizeData.Index--;}}else if (TokenizeData.Start == '&'){TokenizeData.TokenData.Code = SpecialMap["&"];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["&="];}else if (TokenizeData.Start == '&'){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["&&"];}else{TokenizeData.Index--;}}else if (TokenizeData.Start == '|'){TokenizeData.TokenData.Code = SpecialMap["|"];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["|="];}else if (TokenizeData.Start == '|'){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["||"];}else{TokenizeData.Index--;}}else if (TokenizeData.Start == '^'){TokenizeData.TokenData.Code = SpecialMap["^"];TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];if (TokenizeData.Start == '='){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.TokenData.Code = SpecialMap["^="];}else{TokenizeData.Index--;}}else if (TokenizeData.Start == '"'){TokenizeData.TokenData.Code = RegularMap["\\"(all char)*\\""];TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];while (true){if (TokenizeData.Start == '"'){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];break;}if (TokenizeData.Start == '\\\\'){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];}TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];}TokenizeData.Index--;}else if (TokenizeData.Start == '\\''){TokenizeData.TokenData.Code = RegularMap["\\'(all char)*\\'"];TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];while (true){if (TokenizeData.Start == '\\''){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];break;}if (TokenizeData.Start == '\\\\'){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];}TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];}TokenizeData.Index--;}else if (TokenizeData.Start == '_'){TokenizeData.TokenData.Code = RegularMap["_(letter|digit|$)*"];do{TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];}while (IsLetter(TokenizeData.Start) || IsDigit(TokenizeData.Start) || TokenizeData.Start == '$' || TokenizeData.Start == '_');TokenizeData.Index--;}else if (TokenizeData.Start == '$'){TokenizeData.TokenData.Code = RegularMap["$(letter|digit|$)*"];do{TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];}while (IsLetter(TokenizeData.Start) || IsDigit(TokenizeData.Start) || TokenizeData.Start == '$' || TokenizeData.Start == '_');TokenizeData.Index--;}else if (TokenizeData.Start == '#'){TokenizeData.TokenData.Code = KeywordMap["#"];TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];while (IsLetter(TokenizeData.Start)){TokenizeData.TokenData.Token += TokenizeData.Start;TokenizeData.Start = TokenizeData.Data[TokenizeData.Index++];}TokenizeData.Index--;}if (TokenizeData.TokenData.Code == 0){TokenizeData.TokenData.Token = TokenizeData.Start;TokenizeData.Errors.push_back("ERR: " + TokenizeData.FilePath + " " + std::to_string(TokenizeData.Lines) + " there is an undefined " +TokenizeData.TokenData.Token + " character whose ascii code is " + std::to_string(static_cast<int>(TokenizeData.TokenData.Token[0])) + ".");}}/* 单词分析*/std::string Tokenize(const std::string& Data){TokenizeData.Data = Data;TokenizeData.Index = 0;TokenizeData.Size = Data.size();while (GetValidChar()){TokenizeData.TokenData = {"", 0};if (TokenizeData.Start == '\\n'){TokenizeData.Lines++;}else if (IsLetter(TokenizeData.Start)){TokenizeAlpha();}else if (IsDigit(TokenizeData.Start)){TokenizeDigit();}else{TokenizeSpecial();}if (!TokenizeData.TokenData.Token.empty()){TokenizeData.Result += "(" + TokenizeData.TokenData.Token + "," + std::to_string(TokenizeData.TokenData.Code) + ")" + "\\n";}}return TokenizeData.Result;}/ 打印彩色字* 0=黑色 1=蓝色 2=绿色 3=湖蓝色* 4=红色 5=紫色 6=黄色 7=白色* 8=灰色 9=淡蓝色 10=淡绿色 11=淡浅绿色* 12=淡红色 13=淡紫色 14=淡黄色 15=亮白色* @param ForeColor 字体颜色* @param BackColor 字体背景颜色*/static void SetFontColor(int ForeColor, int BackColor){SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), static_cast<WORD>(ForeColor + BackColor * 0x10));}//模板一下,方便调用,T表示任何可以被cout输出的类型template <typename T>static void CoutWithColor(T t, int ForeColor = 7, int BackColor = 0){SetFontColor(ForeColor, BackColor);std::cout << t;SetFontColor(7, 0);Sleep(4);}/* 识别单词高亮*/static void Highlight(const std::string& SourceCode, const std::string& TokenData){const size_t CodeSize = SourceCode.size();const size_t ContentSize = TokenData.size();size_t Index = 0;size_t Lines = 0;CoutWithColor(std::to_string(Lines) + "\\t", 7);++Lines;for (size_t i = 0; i < CodeSize; ++i){if (SourceCode[i] == '\\n'){CoutWithColor(SourceCode[i] + std::to_string(Lines) + "\\t", 7);++Lines;}else if (SourceCode[i] == ' ' || SourceCode[i] == '\\t'){CoutWithColor(SourceCode[i], 7);}else if (SourceCode[i] == '/' && (i + 1) < CodeSize && SourceCode[i + 1] == '/'){std::string Temp;while (i < CodeSize && SourceCode[i] != '\\n'){Temp += SourceCode[i++];}i--;CoutWithColor(Temp, 2);}else if (SourceCode[i] == '/' && (i + 1) < CodeSize && SourceCode[i + 1] == '*'){std::string Temp;Temp += SourceCode[i];Temp += SourceCode[i + 1];i = i + 2;while (i < CodeSize){if (SourceCode[i] == '\\n'){CoutWithColor(Temp, 2);CoutWithColor(SourceCode[i] + std::to_string(Lines) + "\\t", 7);++Lines;++i;Temp = "";}else if (SourceCode[i] == '*' && (i + 1) < CodeSize && SourceCode[i + 1] == '/'){Temp += SourceCode[i++];Temp += SourceCode[i++];break;}else{Temp += SourceCode[i++];}}CoutWithColor(Temp, 2);i--;}else{std::string Doc;while (Index < ContentSize && TokenData[Index] != '\\n'){Doc += TokenData[Index++];}if (TokenData[Index] == '\\n'){Index++;}size_t Mid = 0;std::string Token;int Code = 0;// 寻找 , 分隔符for (size_t Pos = Doc.size() - 1; Pos != 0; --Pos){if (Doc[Pos] != ','){Mid = Pos - 1;}else{break;}}// 截取 Tokenfor (size_t Start = 1; Start < Mid; ++Start){Token += Doc[Start];}// 截取 Codefor (size_t Start = Mid + 1; Start < (Doc.size() - 1); ++Start){Code = Code * 10 + (Doc[Start] - '0');}// 判断类型颜色int ForeColor = 15;if ((10 <= Code and Code <= 26) or (100 <= Code and Code <= 127)) // 运算符和界限符{ForeColor = 6;}else if ((1 <= Code and Code <= 7) or (27 <= Code and Code <= 53)) // 关键字{ForeColor = 9;}else if (Code == 9 or (204 <= Code and Code <= 206)) // 无符号二,八,十,十六进制整型常数{ForeColor = 12;}else if (Code == 8 or Code == 202 or Code == 203) // 标识符{ForeColor = 10;}else if (Code == 200 or Code == 201) // 字符串常量{ForeColor = 14;}CoutWithColor(Token, ForeColor);i += Token.size() - 1;}}CoutWithColor("\\n", 7);}/* 显示词法分析结果*/void ShowResult() const{std::cout << "///\\n";CoutWithColor("VER: Copyright moota, private. all rights reserved.\\n", 7);CoutWithColor("INF: Lexical analysis is finished, " + std::to_string(TokenizeData.Warnings.size()) + " warnings, " +std::to_string(TokenizeData.Errors.size()) + " errors.\\n", 14);for (auto& Info : TokenizeData.Warnings){CoutWithColor(Info + "\\n", 9);}for (auto& Info : TokenizeData.Errors){CoutWithColor(Info + "\\n", 12);}}public:/* 调试词法分析器*/void Main(){// 初始词法分析器Initialize();// 读取处理文件const std::string Doc = ReadFile("Test.c");SaveFile(Doc, "TestRead.txt");// 预处理读入数据const std::string PreDoc = Preprocess(Doc);SaveFile(PreDoc, "TestPre.txt");// 识别单词符号const std::string Token = Tokenize(PreDoc);SaveFile(Token, "TestToken.txt");// 识别单词高亮Highlight(Doc, Token);// 显示词法分析结果ShowResult();getchar();}
};int main()
{FLexicalAnalyzer LexicalAnalyzer;LexicalAnalyzer.Main();return 0;
}