XCBOSA - ITX 3 سال پیش
والد
کامیت
e310654229

+ 2 - 0
Exp1_Lex.xcodeproj/project.pbxproj

@@ -25,6 +25,7 @@
 /* Begin PBXFileReference section */
 		75BCCD9A281BA169008E8808 /* Exp1_Lex */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = Exp1_Lex; sourceTree = BUILT_PRODUCTS_DIR; };
 		75BCCD9D281BA169008E8808 /* main.c */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.c; path = main.c; sourceTree = "<group>"; };
+		75BCCDA4281BA258008E8808 /* testfile.txt */ = {isa = PBXFileReference; lastKnownFileType = text; path = testfile.txt; sourceTree = "<group>"; };
 /* End PBXFileReference section */
 
 /* Begin PBXFrameworksBuildPhase section */
@@ -58,6 +59,7 @@
 			isa = PBXGroup;
 			children = (
 				75BCCD9D281BA169008E8808 /* main.c */,
+				75BCCDA4281BA258008E8808 /* testfile.txt */,
 			);
 			path = Exp1_Lex;
 			sourceTree = "<group>";

+ 8 - 0
Exp1_Lex.xcodeproj/xcuserdata/xcbosa.xcuserdatad/xcschemes/xcschememanagement.plist

@@ -10,5 +10,13 @@
 			<integer>0</integer>
 		</dict>
 	</dict>
+	<key>SuppressBuildableAutocreation</key>
+	<dict>
+		<key>75BCCD99281BA169008E8808</key>
+		<dict>
+			<key>primary</key>
+			<true/>
+		</dict>
+	</dict>
 </dict>
 </plist>

+ 408 - 2
Exp1_Lex/main.c

@@ -6,9 +6,415 @@
 //
 
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define true 1
+#define false 0
+
+typedef char *String;
+typedef int Bool;
+
+Bool stringEquals(String left, String right) {
+    if (left == right) {
+        // 常量可以直接判断地址,地址相等串内容一定相等,加速比较
+        return true;
+    }
+    return strcmp(left, right) == 0;
+}
+
+String stringFromStackString(String stackString) {
+    String str = (String)calloc(strlen(stackString) + 1, sizeof(char));
+    strcpy(str, stackString);
+    return str;
+}
+
+// MARK: - 手动带缓存的File,C的File居然不能回退字符...
+
+typedef struct {
+    FILE *fp;
+    char buf[1024];
+    int bufSize;
+} CachedFile;
+
+typedef struct {
+    fpos_t fpos;
+    CachedFile lastState;
+} CachedFileFPos;
+
+CachedFile *cached_fopen(const String file) {
+    FILE *fp = fopen(file, "r");
+    CachedFile *cached = (CachedFile *)calloc(1, sizeof(CachedFile));
+    cached->fp = fp;
+    cached->bufSize = 0;
+    return cached;
+}
+
+char cached_fgetc(CachedFile *file) {
+    if (file->bufSize > 0) {
+        char ch = file->buf[file->bufSize - 1];
+        file->bufSize--;
+        return ch;
+    }
+    return fgetc(file->fp);
+}
+
+void cached_fputc(char ch, CachedFile *file) {
+    file->buf[file->bufSize] = ch;
+    file->bufSize++;
+}
+
+int cached_fgetpos(CachedFile *file, CachedFileFPos *pos) {
+    fpos_t fpos;
+    int ret = fgetpos(file->fp, &fpos);
+    pos->fpos = fpos;
+    pos->lastState = *file;
+    return ret;
+}
+
+int cached_fsetpos(CachedFile *file, CachedFileFPos *pos) {
+    memcpy(file->buf, pos->lastState.buf, 1024);
+    file->bufSize = pos->lastState.bufSize;
+    return fsetpos(file->fp, &pos->fpos);
+}
+
+// MARK: - Definitions
+
+// 根据指导书的常量串
+const String IDENFR = "IDENFR";
+const String INTCON = "INTCON";
+const String CHARCON = "CHARCON";
+const String STRCON = "STRCON";
+const String CONSTTK = "CONSTTK";
+const String INTTK = "INTTK";
+const String CHARTK = "CHARTK";
+const String VOIDTK = "VOIDTK";
+const String MAINTK = "MAINTK";
+const String IFTK = "IFTK";
+const String DOTK = "DOTK";
+
+const String ELSETK = "ELSETK";
+const String SWITCHTK = "SWITCHTK";
+const String CASETK = "CASETK";
+const String DEFAULTTK = "DEFAULTTK";
+const String WHILETK = "WHILETK";
+const String FORTK = "FORTK";
+const String SCANFTK = "SCANFTK";
+const String PRINTFTK = "PRINTFTK";
+const String RETURNTK = "RETURNTK";
+const String PLUS = "PLUS";
+
+const String MINU = "MINU";
+const String MULT = "MULT";
+const String DIV = "DIV";
+const String LSS = "LSS";
+const String LEQ = "LEQ";
+const String GRE = "GRE";
+const String GEQ = "GEQ";
+const String EQL = "EQL";
+const String NEQ = "NEQ";
+const String COLON = "COLON";
+
+const String ASSIGN = "ASSIGN";
+const String SEMICN = "SEMICN";
+const String COMMA = "COMMA";
+const String LPARENT = "LPARENT";
+const String RPARENT = "RPARENT";
+const String LBRACK = "LBRACK";
+const String RBRACK = "RBRACK";
+const String LBRACE = "LBRACE";
+const String RBRACE = "RBRACE";
+
+const String EOFTK = "EOF";
+
+struct Token;
+struct TokenRule;
+
+struct Token TokenEOF;
+
+// Token解析代码块,返回NULL代表规则无法解析
+typedef struct Token(*TokenBodyProviderBlock)(CachedFile*, struct TokenRule);
+
+typedef struct TokenRule {
+    String tokenBody;
+    String tokenTy;
+    // 解析规则
+    TokenBodyProviderBlock tokenBodyProvider;
+} TokenRule;
+
+typedef struct Token {
+    String tokenBody;
+    String tokenTy;
+    Bool isEmpty;
+} Token;
+
+const Token TokenEmpty = { NULL, NULL, true };
+
+/// 解析EOF
+/// @param file File
+/// @param rule 触发规则
+Token lexEOF(CachedFile *file, TokenRule rule);
+
+/// 字母单词解析,后边必须接一个非字母,防止局部认对(int -> in)
+/// @param file File
+/// @param rule 触发规则
+Token lexWords(CachedFile *file, TokenRule rule);
+
+/// 符号单词解析,后边无要求,需要确保rules中的顺序为先长后短防止局部认对(<= -> <)
+/// @param file File
+/// @param rule 触发规则
+Token lexSymbols(CachedFile *file, TokenRule rule);
+
+Token lexIDENFR(CachedFile *file, TokenRule rule);
+Token lexINTCON(CachedFile *file, TokenRule rule);
+Token lexCHARCON(CachedFile *file, TokenRule rule);
+Token lexSTRCON(CachedFile *file, TokenRule rule);
+
+TokenRule *rules;
+
+void doInitialization(void) {
+    TokenRule _rules[] = {
+        { NULL, EOFTK, &lexEOF },
+        { NULL, INTCON, &lexINTCON },
+        { NULL, CHARCON, &lexCHARCON },
+        { NULL, STRCON, &lexSTRCON },
+        { "const", CONSTTK, &lexWords },
+        { "int", INTTK, &lexWords },
+        { "char", CHARTK, &lexWords },
+        { "void", VOIDTK, &lexWords },
+        { "main", MAINTK, &lexWords },
+        { "if", IFTK, &lexWords },
+        { "do", DOTK, &lexWords },
+        { "else", ELSETK, &lexWords },
+        { "default", DEFAULTTK, &lexWords },
+        { "while", WHILETK, &lexWords },
+        { "for", FORTK, &lexWords },
+        { "scanf", SCANFTK, &lexWords },
+        { "printf", PRINTFTK, &lexWords },
+        { "return", RETURNTK, &lexWords },
+        { NULL, IDENFR, &lexIDENFR },
+        { "+", PLUS, &lexSymbols },
+        { "-", MINU, &lexSymbols },
+        { "*", MULT, &lexSymbols },
+        { "/", DIV, &lexSymbols },
+        { "<=", LEQ, &lexSymbols },
+        { "<", LSS, &lexSymbols },
+        { ">=", GEQ, &lexSymbols },
+        { ">", GRE, &lexSymbols },
+        { "==", EQL, &lexSymbols },
+        { "!=", NEQ, &lexSymbols },
+        { ":", COLON, &lexSymbols },
+        { "=", ASSIGN, &lexSymbols },
+        { ";", SEMICN, &lexSymbols },
+        { ",", COMMA, &lexSymbols },
+        { "(", LPARENT, &lexSymbols },
+        { ")", RPARENT, &lexSymbols },
+        { "[", LBRACK, &lexSymbols },
+        { "]", RBRACK, &lexSymbols },
+        { "{", LBRACE, &lexSymbols },
+        { "}", RBRACE, &lexSymbols },
+        { NULL, NULL, NULL }
+    };
+    rules = calloc(sizeof(_rules), 1);
+    memcpy(rules, _rules, sizeof(_rules));
+    TokenEOF.tokenTy = EOFTK;
+    TokenEOF.tokenBody = "";
+    TokenEOF.isEmpty = false;
+}
+
+Bool isAlphaOrNumberOrUnderline(char ch) {
+    return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_';
+}
+
+Bool isAlphaOrUnderline(char ch) {
+    return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || ch == '_';
+}
+
+Bool isNumberOrXOrEOrPoint(char ch) {
+    return (ch >= '0' && ch <= '9') || ch == 'X' || ch == 'x' || ch == 'E' || ch == 'e' || ch == '.';
+}
+
+Bool isEmptyWord(char ch) {
+    return ch == ' ' || ch == '\n' || ch == '\r' || ch == '\t';
+}
+
+// MARK: - Lex Implementation
+
+/// 按规则解析下一个单词
+/// @param file File
+Token lexNext(CachedFile *file) {
+    TokenRule *ruleIterator = &rules[0];
+    while (ruleIterator->tokenTy) {
+        char ch = '\0';
+        while (isEmptyWord(ch = cached_fgetc(file))) ;
+        cached_fputc(ch, file);
+        CachedFileFPos fpos;
+        cached_fgetpos(file, &fpos);
+        Token result = ruleIterator->tokenBodyProvider(file, *ruleIterator);
+        if (result.isEmpty) {
+            cached_fsetpos(file, &fpos);
+        } else {
+            return result;
+        }
+        ruleIterator++;
+    }
+    return TokenEmpty;
+}
+
+Token lexEOF(CachedFile *file, TokenRule rule) {
+    char ch = cached_fgetc(file);
+    if (ch == '\0') {
+        return TokenEOF;
+    }
+    cached_fputc(ch, file);
+    return TokenEmpty;
+}
+
+Token lexWords(CachedFile *file, TokenRule rule) {
+    char ch = '\0';
+    char *pexp = &rule.tokenBody[0];
+    while ((ch = cached_fgetc(file)) != '\0') {
+        if (*pexp == '\0') {
+            // 需要接非字母和数字
+            if (isAlphaOrNumberOrUnderline(ch)) {
+                return TokenEmpty;
+            }
+            cached_fputc(ch, file);
+            Token tk = { rule.tokenBody, rule.tokenTy, false };
+            return tk;
+        }
+        if (ch != *pexp) {
+            return TokenEmpty;
+        }
+        pexp++;
+    }
+    return TokenEOF;
+}
+
+Token lexSymbols(CachedFile *file, TokenRule rule) {
+    char ch = '\0';
+    char *pexp = &rule.tokenBody[0];
+    while ((ch = cached_fgetc(file)) != '\0') {
+        if (*pexp == '\0') {
+            cached_fputc(ch, file);
+            Token tk = { rule.tokenBody, rule.tokenTy, false };
+            return tk;
+        }
+        if (ch != *pexp) {
+            return TokenEmpty;
+        }
+        pexp++;
+    }
+    return TokenEOF;
+}
+
+Token lexIDENFR(CachedFile *file, TokenRule rule) {
+    char ch = '\0';
+    Bool first = true;
+    char tmp[1024], *pt = &tmp[0];
+    while ((ch = cached_fgetc(file)) != '\0') {
+        if (first) {
+            if (!isAlphaOrUnderline(ch)) {
+                return TokenEmpty;
+            }
+            first = false;
+        }
+        if (!isAlphaOrNumberOrUnderline(ch)) {
+            cached_fputc(ch, file);
+            if (pt != &tmp[0]) {
+                *(pt++) = '\0';
+                Token tk = { stringFromStackString(tmp), IDENFR, false };
+                return tk;
+            }
+        }
+        *(pt++) = ch;
+    }
+    return TokenEOF;
+}
+
+Token lexINTCON(CachedFile *file, TokenRule rule) {
+    char ch = '\0';
+    Bool first = true;
+    char tmp[1024], *pt = &tmp[0];
+    while ((ch = cached_fgetc(file)) != '\0') {
+        if (first) {
+            if (!(ch >= '0' && ch <= '9')) {
+                return TokenEmpty;
+            }
+            first = false;
+        }
+        if (!isNumberOrXOrEOrPoint(ch)) {
+            cached_fputc(ch, file);
+            if (pt != &tmp[0]) {
+                *(pt++) = '\0';
+                Token tk = { stringFromStackString(tmp), INTCON, false };
+                return tk;
+            }
+        }
+        *(pt++) = ch;
+    }
+    return TokenEOF;
+}
+
+Token lexCHARCON(CachedFile *file, TokenRule rule) {
+    char tmp[1024], *pt = &tmp[0];
+    if (cached_fgetc(file) != '\'') {
+        return TokenEmpty;
+    }
+    //*(pt++) = '\'';
+    char c1 = cached_fgetc(file);
+    *(pt++) = c1;
+    if (c1 == '\\') {
+        *(pt++) = cached_fgetc(file);
+    }
+    char end = cached_fgetc(file);
+    if (end == '\'') {
+        //*(pt++) = end;
+        *(pt++) = '\0';
+        Token tk = { stringFromStackString(tmp), CHARCON, false };
+        return tk;
+    }
+    printf("Error: Un breaking char constant: Expect ', but got %c", end);
+    exit(-1);
+}
+
+Token lexSTRCON(CachedFile *file, TokenRule rule) {
+    char tmp[1024], *pt = &tmp[0];
+    if (cached_fgetc(file) != '\"') {
+        return TokenEmpty;
+    }
+    //*(pt++) = '\"';
+    while (true) {
+        char ch = cached_fgetc(file);
+        if (ch != '\"')
+            *(pt++) = ch;
+        if (ch == '\\') {
+            char ch2 = cached_fgetc(file);
+            *(pt++) = ch2;
+        }
+        if (ch == '\"') {
+            break;
+        }
+        if (ch == '\0' || ch == '\n') {
+            printf("Error: Un breaking string constant, expect \", but got nothing");
+            exit(-1);
+        }
+    }
+    *(pt++) = '\0';
+    Token tk = { stringFromStackString(tmp), STRCON, false };
+    return tk;
+}
 
 int main(int argc, const char * argv[]) {
-    // insert code here...
-    printf("Hello, World!\n");
+    doInitialization();
+    CachedFile *fp = cached_fopen("testfile.txt");
+    FILE *wp = fopen("output.txt", "w+");
+    Token currentWord;
+    while (!(currentWord = lexNext(fp)).isEmpty) {
+        if (stringEquals(currentWord.tokenTy, EOFTK)) {
+            return 0;
+        }
+        fprintf(wp, "%s %s\n", currentWord.tokenTy, currentWord.tokenBody);
+    }
     return 0;
 }

+ 12 - 0
Exp1_Lex/testfile.txt

@@ -0,0 +1,12 @@
+const int const1 = 1, const2 = -100;
+const char const3 = '_';
+int change1;
+char change3;
+int gets1(int var1,int var2){
+    change1 = var1 + var2;
+    return (change1);
+}
+void main(){
+    printf("Hello World");
+    printf(gets1(10, 20));
+}