Refactor code

Change mpc.py to auto generate antlr4 files without having to run manualy. Also add a small command line ultility.
Remove commented import in others files, and update README
This commit is contained in:
Nguyễn Anh Khoa 2018-12-18 01:06:36 +07:00
parent d33f3a54ba
commit d6be336a0c
7 changed files with 136 additions and 110 deletions

View File

@ -1,8 +1,8 @@
# MP Compiler # MP Compiler
From my Principle of Programming Languages assignment, I have created a compiler for the MP language. The assignment phase is divided to 4 phases, from doing Lexer, Parser, AST generation to Static Checker and Jasmin Code generation. From my Principle of Programming Languages assignment, I have created a compiler for the MP language. The assignment is divided to 4 phases, from Lexer and Parser, to AST generation to Static Checker and Jasmin Code generation.
The assignment code structure is quite ugle, so I re-organized the code, adding some more steps to make the code look nicer and compile a \*.mp file to a jar file. The assignment code structure is quite ugly, so I re-organized the code, adding some more steps to make the code look nicer and compile a `*.mp` file to a `jar` file.
Given the mp file as follows: Given the mp file as follows:
@ -24,13 +24,73 @@ java -jar hello.jar
More documentation is being built. More documentation is being built.
## Notes ## Project Structure
``
.
├── mpc.py
├── tests
├── astgen
│   ├── ASTGeneration.py
│   ├── __init__.py
├── checker
│   ├── __init__.py
│   ├── StaticCheck.py
│   └── StaticError.py
├── codegen
│   ├── CodeGenerator.py
│   ├── CodeGenError.py
│   ├── Emitter.py
│   ├── Frame.py
│   ├── __init__.py
│   ├── MachineCode.py
├── external
│   ├── antrl4.jar
│   └── jasmin.jar
├── libs
│   ├── io.class
│   └── io.java
├── MP_specifications
│   ├── assignment1.pdf
│   ├── assignment2.pdf
│   ├── assignment3.pdf
│   ├── assignment4.pdf
│   └── MP.pdf
├── parser
│   ├── __init__.py
│   ├── lexererr.py
│   ├── MP.g4
│   ├── MP.interp
│   ├── MPLexer.interp
│   ├── MPLexer.py
│   ├── MPLexer.tokens
│   ├── MPParser.py
│   ├── MP.tokens
│   ├── MPVisitor.py
├── tools
│   ├── genANTLR4.py
│   ├── __init__.py
└── utils
├── AST.py
├── __init__.py
├── Utils.py
└── Visitor.py
``````
All files is categorized and put into their own folder, turning them to a module by using `__init__.py` and import them by using the syntax `from package.module import module`. This makes the code easier to read, easier to find code, rather than altering the system path. The files in parser are created by running antlr4 generator from the given `MP.g4` file. Import this module is a little hard, but an exception can be catch upon importing the module to generate the neccessary files and import the module afterward.
Because I was having serious deadlines at the end of the semester, I drop on working on ArrayCell, which will be added later. Most of the code is of my own teacher, Nguyen Hua Phung, I have no plan to refactor all this code as the architecture is just right. So all rights go to my teacher. The only thing I do is refactor part of the code and implement the algorithm.
Because the lexer and parser are given by the famous `ANTLR4` engine, there should exists a path to antlr4.jar on the environment variable `ANTLR_LIB` or else, the program will use the antlr4 file in the external folder. Test folder is currently in manual testing. There will be a unit test to test functions in the future, maybe.
## Development
### TODO
+ Finish Array in CodeGen
+ Add unit test frame
+ Add magic???
### Magic
The specification for this language is a one-file compile, but I want to make it able to work in multiple files, maybe in so-far future I will try to make this possible. Also, why not make a language highlighter for this language?
Before running `mpc.py`, you must be sure that you have generate neccessary files from ANTLR4.
```shell
python genANTRL4.py
```

View File

@ -2,19 +2,15 @@ from parser.MPVisitor import MPVisitor
from parser.MPParser import MPParser from parser.MPParser import MPParser
from functools import reduce from functools import reduce
# * is not a good use case
from utils.AST import ( from utils.AST import (
IntType, IntType,
FloatType, FloatType,
BoolType, BoolType,
StringType, StringType,
ArrayType, ArrayType,
# VoidType,
Program, Program,
# Decl,
VarDecl, VarDecl,
FuncDecl, FuncDecl,
# Stmt,
Assign, Assign,
If, If,
While, While,
@ -24,14 +20,11 @@ from utils.AST import (
Return, Return,
With, With,
CallStmt, CallStmt,
# Expr,
BinaryOp, BinaryOp,
UnaryOp, UnaryOp,
CallExpr, CallExpr,
# LHS,
Id, Id,
ArrayCell, ArrayCell,
# Literal,
IntLiteral, IntLiteral,
FloatLiteral, FloatLiteral,
StringLiteral, StringLiteral,

View File

@ -1,13 +1,11 @@
from utils.Visitor import BaseVisitor from utils.Visitor import BaseVisitor
from utils.Utils import Utils from utils.Utils import Utils
from checker.StaticError import ( from checker.StaticError import (
# Kind,
Function, Function,
Procedure, Procedure,
Variable, Variable,
Parameter, Parameter,
Identifier, Identifier,
# StaticError,
Undeclared, Undeclared,
Redeclared, Redeclared,
TypeMismatchInExpression, TypeMismatchInExpression,
@ -26,32 +24,8 @@ from utils.AST import (
StringType, StringType,
ArrayType, ArrayType,
VoidType, VoidType,
# Program,
# Decl,
# VarDecl,
FuncDecl, FuncDecl,
# Stmt,
# Assign,
# If,
# While,
# For,
# Break,
# Continue,
# Return,
# With,
# CallStmt,
# Expr,
# BinaryOp,
# UnaryOp,
CallExpr, CallExpr,
# LHS,
# Id,
# ArrayCell,
# Literal,
# IntLiteral,
# FloatLiteral,
# StringLiteral,
# BooleanLiteral
) )
from functools import reduce from functools import reduce
@ -75,9 +49,10 @@ class Symbol:
self.value = value self.value = value
def __str__(self): def __str__(self):
return 'Symbol({},{})'.format( return 'Symbol({},{},{})'.format(
self.name, self.name,
str(self.mtype) str(self.mtype),
str(self.value)
) )
@ -138,12 +113,6 @@ class StaticChecker(BaseVisitor, Utils):
if res is not None: if res is not None:
raise Redeclared(kind, symbol.name) raise Redeclared(kind, symbol.name)
def mergeGlobal2Local(self, local_scope, global_scope):
for s in global_scope:
res = self.lookup(s.name, local_scope, lambda e: e.name.lower())
if res is None:
local_scope.append(s)
def checkTypeCompatibility(self, lhs, rhs, error): def checkTypeCompatibility(self, lhs, rhs, error):
# array check # array check
if isinstance(lhs, ArrayType): if isinstance(lhs, ArrayType):

View File

@ -2,46 +2,23 @@ from utils.Utils import Utils
from utils.Visitor import BaseVisitor from utils.Visitor import BaseVisitor
from checker.StaticCheck import MType, Symbol from checker.StaticCheck import MType, Symbol
from utils.AST import ( from utils.AST import (
# Type,
IntType, IntType,
FloatType, FloatType,
BoolType, BoolType,
StringType, StringType,
# ArrayType,
VoidType, VoidType,
# Program,
# Decl,
# VarDecl,
FuncDecl, FuncDecl,
# Stmt,
Assign, Assign,
# If,
While, While,
# For,
# Break,
# Continue,
# Return,
# With,
# CallStmt,
# Expr,
BinaryOp, BinaryOp,
# UnaryOp,
# CallExpr,
# LHS,
Id, Id,
# ArrayCell,
# Literal,
IntLiteral, IntLiteral,
# FloatLiteral,
# StringLiteral,
# BooleanLiteral,
ArrayPointerType, ArrayPointerType,
ClassType ClassType
) )
from codegen.Emitter import Emitter from codegen.Emitter import Emitter
from codegen.Frame import Frame from codegen.Frame import Frame
from abc import ABC # , abstractmethod from abc import ABC
# from functools import reduce
class CodeGenerator(Utils): class CodeGenerator(Utils):

67
mpc.py
View File

@ -1,21 +1,25 @@
import sys
import os import os
import subprocess import subprocess
import click
from antlr4 import FileStream, CommonTokenStream # Token from antlr4 import FileStream, CommonTokenStream
from antlr4.error.ErrorListener import ConsoleErrorListener # ErrorListener from antlr4.error.ErrorListener import ConsoleErrorListener
from tools.genANTLR4 import generate, regenerate
try: try:
from parser.MPLexer import MPLexer as Lexer # dynamic loading of ANTLR4 files
from parser.MPParser import MPParser as Parser from parser import MPLexer, MPParser # type: ignore
except ImportError:
generate()
from parser import MPLexer, MPParser # type: ignore
from astgen.ASTGeneration import ASTGeneration from astgen.ASTGeneration import ASTGeneration
from checker.StaticCheck import StaticChecker from checker.StaticCheck import StaticChecker
from codegen.CodeGenerator import CodeGenerator from codegen.CodeGenerator import CodeGenerator
except ModuleNotFoundError:
print('Generate ANTLR4 first')
print('python genANTLR4.py')
exit(1)
Lexer = MPLexer.MPLexer # load from module
Parser = MPParser.MPParser # load from module
ANTLR_JAR = os.environ.get('ANTLR_LIB') ANTLR_JAR = os.environ.get('ANTLR_LIB')
JASMIN_JAR = './external/jasmin.jar' JASMIN_JAR = './external/jasmin.jar'
@ -43,7 +47,7 @@ class NewErrorListener(ConsoleErrorListener):
offendingSymbol.text) offendingSymbol.text)
def compile(inputfile): def compile(inputfile, output):
lexer = Lexer(FileStream(inputfile)) lexer = Lexer(FileStream(inputfile))
tokens = CommonTokenStream(lexer) tokens = CommonTokenStream(lexer)
try: try:
@ -68,14 +72,14 @@ def compile(inputfile):
checker = StaticChecker(asttree) checker = StaticChecker(asttree)
checker.check() checker.check()
path = os.path.dirname(inputfile) path = output
filename = os.path.basename(inputfile).split('.')[0] filename = os.path.basename(inputfile).split('.')[0]
jasmin_file = '{}/{}.j'.format(path, filename)
codeGen = CodeGenerator() codeGen = CodeGenerator()
codeGen.gen(asttree, path, filename) codeGen.gen(asttree, path, filename)
subprocess.call( subprocess.call(
# "java -jar " + JASMIN_JAR + " " + path + "/MPClass.j", "java -jar {} {} -d {}".format(JASMIN_JAR, jasmin_file, path),
"java -jar {} {}/{}.j -d {}".format(JASMIN_JAR, path, filename, path),
shell=True, shell=True,
stderr=subprocess.STDOUT stderr=subprocess.STDOUT
) )
@ -84,7 +88,6 @@ def compile(inputfile):
iofile.write(open('libs/io.class', 'rb').read()) iofile.write(open('libs/io.class', 'rb').read())
subprocess.call( subprocess.call(
# 'jar cvfm {}/{}.jar {}/manifest.mf {} {}.class'.format(
'jar cvfe {0}.jar {0} io.class {0}.class'.format( 'jar cvfe {0}.jar {0} io.class {0}.class'.format(
filename filename
), ),
@ -93,20 +96,42 @@ def compile(inputfile):
stderr=subprocess.STDOUT stderr=subprocess.STDOUT
) )
os.remove('{}/{}.j'.format(path, filename)) os.remove(jasmin_file)
os.remove('{}/{}.class'.format(path, filename)) os.remove('{}/{}.class'.format(path, filename))
os.remove('{}/io.class'.format(path)) os.remove('{}/io.class'.format(path))
if __name__ == "__main__": @click.command()
argv = sys.argv @click.argument('file')
if len(argv) != 2: @click.option(
exit(1) '--output', default='',
help='Where the jar file will be after compile')
@click.option(
'--regen', is_flag=True,
help='Regenerate antlr4 files and exit'
)
def main(file, output, regen):
if regen:
regenerate()
print("ANTLR4 files regenerate succesfully")
return
if output == '':
output = os.path.dirname(file)
else:
# check if directory, create
# if file path, then
pass
print("Compiling {}".format(os.path.relpath(file)))
try: try:
print("Compiling {}".format(os.path.relpath(argv[1]))) compile(file, output)
compile(argv[1])
except BaseException as e: except BaseException as e:
print(e) print(e)
exit(1) exit(1)
print("Compiled successfully") print("Compiled successfully")
if __name__ == "__main__":
main()

0
tools/__init__.py Normal file
View File

View File

@ -3,23 +3,23 @@ import subprocess
ANTLR_JAR = os.environ.get('ANTLR_LIB') ANTLR_JAR = os.environ.get('ANTLR_LIB')
JASMIN_JAR = './external/jasmin.jar'
if ANTLR_JAR is None: if ANTLR_JAR is None:
# fall back, not recommended # fall back, not recommended
ANTLR_JAR = './external/antrl4.jar' ANTLR_JAR = '../external/antrl4.jar'
files_from_antlr4 = [
'parser/MP.interp',
'parser/MPLexer.interp',
'parser/MPLexer.py',
'parser/MPLexer.tokens',
'parser/MPParser.py',
'parser/MP.tokens',
'parser/MPVisitor.py'
]
def generate(): def generate():
files_from_antlr4 = [
'MP.interp',
'MPLexer.interp',
'MPLexer.py',
'MPLexer.tokens',
'MPParser.py',
'MP.tokens',
'MPVisitor.py'
]
if all(list(map(os.path.isfile, files_from_antlr4))): if all(list(map(os.path.isfile, files_from_antlr4))):
return return
gen_antlr_class_cmd = [ gen_antlr_class_cmd = [
@ -33,5 +33,7 @@ def generate():
subprocess.run(gen_antlr_class_cmd) subprocess.run(gen_antlr_class_cmd)
if __name__ == '__main__': def regenerate():
for f in files_from_antlr4:
os.remove(f)
generate() generate()