chibiccを読む~Cコンパイラコードリーディング~ ステップ8
トップページ
jupiteroak.hatenablog.com
「低レイヤを知りたい人のためのCコンパイラ作成入門」のCコンパイラを読んでいきます。
www.sigbus.info
ステップ8に該当
github.com
コンパイラのソースコード
chibicc.h
https://github.com/rui314/chibicc/commit/3396747fcbf51ae45aa162f5c9c41fa719050421#diff-d06dbb7ef5899cdf50b340464444680b13aded45363e7aba944dc3551fdf6334R1
https://github.com/rui314/chibicc/blob/3396747fcbf51ae45aa162f5c9c41fa719050421/chibicc.h#L1
#include <ctype.h> #include <stdarg.h> #include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> // // tokenize.c // typedef enum { TK_RESERVED, // Keywords or punctuators TK_NUM, // Integer literals TK_EOF, // End-of-file markers } TokenKind; // Token type typedef struct Token Token; struct Token { TokenKind kind; // Token kind Token *next; // Next token int val; // If kind is TK_NUM, its value char *str; // Token string int len; // Token length }; void error(char *fmt, ...); void error_at(char *loc, char *fmt, ...); bool consume(char *op); void expect(char *op); int expect_number(); bool at_eof(); Token *new_token(TokenKind kind, Token *cur, char *str, int len); Token *tokenize(); extern char *user_input; extern Token *token; // // parse.c // typedef enum { ND_ADD, // + ND_SUB, // - ND_MUL, // * ND_DIV, // / ND_EQ, // == ND_NE, // != ND_LT, // < ND_LE, // <= ND_NUM, // Integer } NodeKind; // AST node type typedef struct Node Node; struct Node { NodeKind kind; // Node kind Node *lhs; // Left-hand side Node *rhs; // Right-hand side int val; // Used if kind == ND_NUM }; Node *expr(); // // codegen.c // void codegen(Node *node);
main.c
https://github.com/rui314/chibicc/commit/3396747fcbf51ae45aa162f5c9c41fa719050421#diff-a0cb465674c1b01a07d361f25a0ef2b0214b7dfe9412b7777f89add956da10ecR1
https://github.com/rui314/chibicc/blob/3396747fcbf51ae45aa162f5c9c41fa719050421/main.c#L1
#include "chibicc.h" int main(int argc, char **argv) { if (argc != 2) error("%s: invalid number of arguments", argv[0]); // Tokenize and parse. user_input = argv[1]; token = tokenize(); Node *node = expr(); // Traverse the AST to emit assembly. codegen(node); return 0; }
tokenize.c
https://github.com/rui314/chibicc/commit/3396747fcbf51ae45aa162f5c9c41fa719050421#diff-289479d6df6940b25dd31a6f2da4881331f916ec642bd1ae47d4ff0a365d8e88R1
https://github.com/rui314/chibicc/blob/3396747fcbf51ae45aa162f5c9c41fa719050421/tokenize.c#L1
#include "chibicc.h" char *user_input; Token *token; // Reports an error and exit. void error(char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); exit(1); } // Reports an error location and exit. void error_at(char *loc, char *fmt, ...) { va_list ap; va_start(ap, fmt); int pos = loc - user_input; fprintf(stderr, "%s\n", user_input); fprintf(stderr, "%*s", pos, ""); // print pos spaces. fprintf(stderr, "^ "); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); exit(1); } // Consumes the current token if it matches `op`. bool consume(char *op) { if (token->kind != TK_RESERVED || strlen(op) != token->len || memcmp(token->str, op, token->len)) return false; token = token->next; return true; } // Ensure that the current token is `op`. void expect(char *op) { if (token->kind != TK_RESERVED || strlen(op) != token->len || memcmp(token->str, op, token->len)) error_at(token->str, "expected \"%s\"", op); token = token->next; } // Ensure that the current token is TK_NUM. int expect_number() { if (token->kind != TK_NUM) error_at(token->str, "expected a number"); int val = token->val; token = token->next; return val; } bool at_eof() { return token->kind == TK_EOF; } // Create a new token and add it as the next token of `cur`. Token *new_token(TokenKind kind, Token *cur, char *str, int len) { Token *tok = calloc(1, sizeof(Token)); tok->kind = kind; tok->str = str; tok->len = len; cur->next = tok; return tok; } bool startswith(char *p, char *q) { return memcmp(p, q, strlen(q)) == 0; } // Tokenize `user_input` and returns new tokens. Token *tokenize() { char *p = user_input; Token head; head.next = NULL; Token *cur = &head; while (*p) { // Skip whitespace characters. if (isspace(*p)) { p++; continue; } // Multi-letter punctuator if (startswith(p, "==") || startswith(p, "!=") || startswith(p, "<=") || startswith(p, ">=")) { cur = new_token(TK_RESERVED, cur, p, 2); p += 2; continue; } // Single-letter punctuator if (strchr("+-*/()<>", *p)) { cur = new_token(TK_RESERVED, cur, p++, 1); continue; } // Integer literal if (isdigit(*p)) { cur = new_token(TK_NUM, cur, p, 0); char *q = p; cur->val = strtol(p, &p, 10); cur->len = p - q; continue; } error_at(p, "invalid token"); } new_token(TK_EOF, cur, p, 0); return head.next; }
parse.c
https://github.com/rui314/chibicc/commit/3396747fcbf51ae45aa162f5c9c41fa719050421#diff-a07721cd062be25900bddb926de15fc103cf32ea2726d1fea286f6548b810c6aR1
https://github.com/rui314/chibicc/blob/3396747fcbf51ae45aa162f5c9c41fa719050421/parse.c#L1
#include "chibicc.h" Node *new_node(NodeKind kind) { Node *node = calloc(1, sizeof(Node)); node->kind = kind; return node; } Node *new_binary(NodeKind kind, Node *lhs, Node *rhs) { Node *node = new_node(kind); node->lhs = lhs; node->rhs = rhs; return node; } Node *new_num(int val) { Node *node = new_node(ND_NUM); node->val = val; return node; } Node *expr(); Node *equality(); Node *relational(); Node *add(); Node *mul(); Node *unary(); Node *primary(); // expr = equality Node *expr() { return equality(); } // equality = relational ("==" relational | "!=" relational)* Node *equality() { Node *node = relational(); for (;;) { if (consume("==")) node = new_binary(ND_EQ, node, relational()); else if (consume("!=")) node = new_binary(ND_NE, node, relational()); else return node; } } // relational = add ("<" add | "<=" add | ">" add | ">=" add)* Node *relational() { Node *node = add(); for (;;) { if (consume("<")) node = new_binary(ND_LT, node, add()); else if (consume("<=")) node = new_binary(ND_LE, node, add()); else if (consume(">")) node = new_binary(ND_LT, add(), node); else if (consume(">=")) node = new_binary(ND_LE, add(), node); else return node; } } // add = mul ("+" mul | "-" mul)* Node *add() { Node *node = mul(); for (;;) { if (consume("+")) node = new_binary(ND_ADD, node, mul()); else if (consume("-")) node = new_binary(ND_SUB, node, mul()); else return node; } } // mul = unary ("*" unary | "/" unary)* Node *mul() { Node *node = unary(); for (;;) { if (consume("*")) node = new_binary(ND_MUL, node, unary()); else if (consume("/")) node = new_binary(ND_DIV, node, unary()); else return node; } } // unary = ("+" | "-")? unary // | primary Node *unary() { if (consume("+")) return unary(); if (consume("-")) return new_binary(ND_SUB, new_num(0), unary()); return primary(); } // primary = "(" expr ")" | num Node *primary() { if (consume("(")) { Node *node = expr(); expect(")"); return node; } return new_num(expect_number()); }
codegen.c
https://github.com/rui314/chibicc/commit/3396747fcbf51ae45aa162f5c9c41fa719050421#diff-629fe11334ae1d560032cdb6cc6f9a4fbb0f5b1365894b6b648d6ee4d5a654beR1
https://github.com/rui314/chibicc/blob/3396747fcbf51ae45aa162f5c9c41fa719050421/codegen.c#L1
#include "chibicc.h" void gen(Node *node) { if (node->kind == ND_NUM) { printf(" push %d\n", node->val); return; } gen(node->lhs); gen(node->rhs); printf(" pop rdi\n"); printf(" pop rax\n"); switch (node->kind) { case ND_ADD: printf(" add rax, rdi\n"); break; case ND_SUB: printf(" sub rax, rdi\n"); break; case ND_MUL: printf(" imul rax, rdi\n"); break; case ND_DIV: printf(" cqo\n"); printf(" idiv rdi\n"); break; case ND_EQ: printf(" cmp rax, rdi\n"); printf(" sete al\n"); printf(" movzb rax, al\n"); break; case ND_NE: printf(" cmp rax, rdi\n"); printf(" setne al\n"); printf(" movzb rax, al\n"); break; case ND_LT: printf(" cmp rax, rdi\n"); printf(" setl al\n"); printf(" movzb rax, al\n"); break; case ND_LE: printf(" cmp rax, rdi\n"); printf(" setle al\n"); printf(" movzb rax, al\n"); break; } printf(" push rax\n"); } void codegen(Node *node) { printf(".intel_syntax noprefix\n"); printf(".global main\n"); printf("main:\n"); gen(node); // A result must be at the top of the stack, so pop it // to RAX to make it a program exit code. printf(" pop rax\n"); printf(" ret\n"); }
テストコード
test.sh
https://github.com/rui314/chibicc/blob/3396747fcbf51ae45aa162f5c9c41fa719050421/test.sh
#!/bin/bash assert() { expected="$1" input="$2" ./chibicc "$input" > tmp.s gcc -static -o tmp tmp.s ./tmp actual="$?" if [ "$actual" = "$expected" ]; then echo "$input => $actual" else echo "$input => $expected expected, but got $actual" exit 1 fi } assert 0 0 assert 42 42 assert 21 '5+20-4' assert 41 ' 12 + 34 - 5 ' assert 47 '5+6*7' assert 15 '5*(9-6)' assert 4 '(3+5)/2' assert 10 '-10+20' assert 10 '- -10' assert 10 '- - +10' assert 0 '0==1' assert 1 '42==42' assert 1 '0!=1' assert 0 '42!=42' assert 1 '0<1' assert 0 '1<1' assert 0 '2<1' assert 1 '0<=1' assert 1 '1<=1' assert 0 '2<=1' assert 1 '1>0' assert 0 '1>1' assert 0 '1>2' assert 1 '1>=0' assert 1 '1>=1' assert 0 '1>=2' echo OK
Makefile
https://github.com/rui314/chibicc/commit/3396747fcbf51ae45aa162f5c9c41fa719050421#diff-76ed074a9305c04054cdebb9e9aad2d818052b07091de1f20cad0bbac34ffb52R1
https://github.com/rui314/chibicc/blob/3396747fcbf51ae45aa162f5c9c41fa719050421/Makefile#L1
CFLAGS=-std=c11 -g -static SRCS=$(wildcard *.c) OBJS=$(SRCS:.c=.o) chibicc: $(OBJS) $(CC) -o $@ $(OBJS) $(LDFLAGS) $(OBJS): chibicc.h test: chibicc ./test.sh clean: rm -f chibicc *.o *~ tmp* .PHONY: test clean