Refactor code
Change mpc.py to auto generate antlr4 files without having to run manualy. Also add a small command line ultility. Remove commented import in others files, and update README
This commit is contained in:
parent
d33f3a54ba
commit
d6be336a0c
78
README.md
78
README.md
@ -1,8 +1,8 @@
|
|||||||
# MP Compiler
|
# MP Compiler
|
||||||
|
|
||||||
From my Principle of Programming Languages assignment, I have created a compiler for the MP language. The assignment phase is divided to 4 phases, from doing Lexer, Parser, AST generation to Static Checker and Jasmin Code generation.
|
From my Principle of Programming Languages assignment, I have created a compiler for the MP language. The assignment is divided to 4 phases, from Lexer and Parser, to AST generation to Static Checker and Jasmin Code generation.
|
||||||
|
|
||||||
The assignment code structure is quite ugle, so I re-organized the code, adding some more steps to make the code look nicer and compile a \*.mp file to a jar file.
|
The assignment code structure is quite ugly, so I re-organized the code, adding some more steps to make the code look nicer and compile a `*.mp` file to a `jar` file.
|
||||||
|
|
||||||
Given the mp file as follows:
|
Given the mp file as follows:
|
||||||
|
|
||||||
@ -24,13 +24,73 @@ java -jar hello.jar
|
|||||||
More documentation is being built.
|
More documentation is being built.
|
||||||
|
|
||||||
|
|
||||||
## Notes
|
## Project Structure
|
||||||
|
``
|
||||||
|
.
|
||||||
|
├── mpc.py
|
||||||
|
├── tests
|
||||||
|
├── astgen
|
||||||
|
│ ├── ASTGeneration.py
|
||||||
|
│ ├── __init__.py
|
||||||
|
├── checker
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── StaticCheck.py
|
||||||
|
│ └── StaticError.py
|
||||||
|
├── codegen
|
||||||
|
│ ├── CodeGenerator.py
|
||||||
|
│ ├── CodeGenError.py
|
||||||
|
│ ├── Emitter.py
|
||||||
|
│ ├── Frame.py
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── MachineCode.py
|
||||||
|
├── external
|
||||||
|
│ ├── antrl4.jar
|
||||||
|
│ └── jasmin.jar
|
||||||
|
├── libs
|
||||||
|
│ ├── io.class
|
||||||
|
│ └── io.java
|
||||||
|
├── MP_specifications
|
||||||
|
│ ├── assignment1.pdf
|
||||||
|
│ ├── assignment2.pdf
|
||||||
|
│ ├── assignment3.pdf
|
||||||
|
│ ├── assignment4.pdf
|
||||||
|
│ └── MP.pdf
|
||||||
|
├── parser
|
||||||
|
│ ├── __init__.py
|
||||||
|
│ ├── lexererr.py
|
||||||
|
│ ├── MP.g4
|
||||||
|
│ ├── MP.interp
|
||||||
|
│ ├── MPLexer.interp
|
||||||
|
│ ├── MPLexer.py
|
||||||
|
│ ├── MPLexer.tokens
|
||||||
|
│ ├── MPParser.py
|
||||||
|
│ ├── MP.tokens
|
||||||
|
│ ├── MPVisitor.py
|
||||||
|
├── tools
|
||||||
|
│ ├── genANTLR4.py
|
||||||
|
│ ├── __init__.py
|
||||||
|
└── utils
|
||||||
|
├── AST.py
|
||||||
|
├── __init__.py
|
||||||
|
├── Utils.py
|
||||||
|
└── Visitor.py
|
||||||
|
``````
|
||||||
|
All files is categorized and put into their own folder, turning them to a module by using `__init__.py` and import them by using the syntax `from package.module import module`. This makes the code easier to read, easier to find code, rather than altering the system path. The files in parser are created by running antlr4 generator from the given `MP.g4` file. Import this module is a little hard, but an exception can be catch upon importing the module to generate the neccessary files and import the module afterward.
|
||||||
|
|
||||||
Because I was having serious deadlines at the end of the semester, I drop on working on ArrayCell, which will be added later.
|
Most of the code is of my own teacher, Nguyen Hua Phung, I have no plan to refactor all this code as the architecture is just right. So all rights go to my teacher. The only thing I do is refactor part of the code and implement the algorithm.
|
||||||
|
|
||||||
Because the lexer and parser are given by the famous `ANTLR4` engine, there should exists a path to antlr4.jar on the environment variable `ANTLR_LIB` or else, the program will use the antlr4 file in the external folder.
|
Test folder is currently in manual testing. There will be a unit test to test functions in the future, maybe.
|
||||||
|
|
||||||
|
|
||||||
|
## Development
|
||||||
|
|
||||||
|
### TODO
|
||||||
|
|
||||||
|
+ Finish Array in CodeGen
|
||||||
|
+ Add unit test frame
|
||||||
|
+ Add magic???
|
||||||
|
|
||||||
|
### Magic
|
||||||
|
|
||||||
|
The specification for this language is a one-file compile, but I want to make it able to work in multiple files, maybe in so-far future I will try to make this possible. Also, why not make a language highlighter for this language?
|
||||||
|
|
||||||
Before running `mpc.py`, you must be sure that you have generate neccessary files from ANTLR4.
|
|
||||||
```shell
|
|
||||||
python genANTRL4.py
|
|
||||||
```
|
|
||||||
|
@ -2,19 +2,15 @@ from parser.MPVisitor import MPVisitor
|
|||||||
from parser.MPParser import MPParser
|
from parser.MPParser import MPParser
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
|
|
||||||
# * is not a good use case
|
|
||||||
from utils.AST import (
|
from utils.AST import (
|
||||||
IntType,
|
IntType,
|
||||||
FloatType,
|
FloatType,
|
||||||
BoolType,
|
BoolType,
|
||||||
StringType,
|
StringType,
|
||||||
ArrayType,
|
ArrayType,
|
||||||
# VoidType,
|
|
||||||
Program,
|
Program,
|
||||||
# Decl,
|
|
||||||
VarDecl,
|
VarDecl,
|
||||||
FuncDecl,
|
FuncDecl,
|
||||||
# Stmt,
|
|
||||||
Assign,
|
Assign,
|
||||||
If,
|
If,
|
||||||
While,
|
While,
|
||||||
@ -24,14 +20,11 @@ from utils.AST import (
|
|||||||
Return,
|
Return,
|
||||||
With,
|
With,
|
||||||
CallStmt,
|
CallStmt,
|
||||||
# Expr,
|
|
||||||
BinaryOp,
|
BinaryOp,
|
||||||
UnaryOp,
|
UnaryOp,
|
||||||
CallExpr,
|
CallExpr,
|
||||||
# LHS,
|
|
||||||
Id,
|
Id,
|
||||||
ArrayCell,
|
ArrayCell,
|
||||||
# Literal,
|
|
||||||
IntLiteral,
|
IntLiteral,
|
||||||
FloatLiteral,
|
FloatLiteral,
|
||||||
StringLiteral,
|
StringLiteral,
|
||||||
|
@ -1,13 +1,11 @@
|
|||||||
from utils.Visitor import BaseVisitor
|
from utils.Visitor import BaseVisitor
|
||||||
from utils.Utils import Utils
|
from utils.Utils import Utils
|
||||||
from checker.StaticError import (
|
from checker.StaticError import (
|
||||||
# Kind,
|
|
||||||
Function,
|
Function,
|
||||||
Procedure,
|
Procedure,
|
||||||
Variable,
|
Variable,
|
||||||
Parameter,
|
Parameter,
|
||||||
Identifier,
|
Identifier,
|
||||||
# StaticError,
|
|
||||||
Undeclared,
|
Undeclared,
|
||||||
Redeclared,
|
Redeclared,
|
||||||
TypeMismatchInExpression,
|
TypeMismatchInExpression,
|
||||||
@ -26,32 +24,8 @@ from utils.AST import (
|
|||||||
StringType,
|
StringType,
|
||||||
ArrayType,
|
ArrayType,
|
||||||
VoidType,
|
VoidType,
|
||||||
# Program,
|
|
||||||
# Decl,
|
|
||||||
# VarDecl,
|
|
||||||
FuncDecl,
|
FuncDecl,
|
||||||
# Stmt,
|
|
||||||
# Assign,
|
|
||||||
# If,
|
|
||||||
# While,
|
|
||||||
# For,
|
|
||||||
# Break,
|
|
||||||
# Continue,
|
|
||||||
# Return,
|
|
||||||
# With,
|
|
||||||
# CallStmt,
|
|
||||||
# Expr,
|
|
||||||
# BinaryOp,
|
|
||||||
# UnaryOp,
|
|
||||||
CallExpr,
|
CallExpr,
|
||||||
# LHS,
|
|
||||||
# Id,
|
|
||||||
# ArrayCell,
|
|
||||||
# Literal,
|
|
||||||
# IntLiteral,
|
|
||||||
# FloatLiteral,
|
|
||||||
# StringLiteral,
|
|
||||||
# BooleanLiteral
|
|
||||||
)
|
)
|
||||||
from functools import reduce
|
from functools import reduce
|
||||||
|
|
||||||
@ -75,9 +49,10 @@ class Symbol:
|
|||||||
self.value = value
|
self.value = value
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return 'Symbol({},{})'.format(
|
return 'Symbol({},{},{})'.format(
|
||||||
self.name,
|
self.name,
|
||||||
str(self.mtype)
|
str(self.mtype),
|
||||||
|
str(self.value)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@ -138,12 +113,6 @@ class StaticChecker(BaseVisitor, Utils):
|
|||||||
if res is not None:
|
if res is not None:
|
||||||
raise Redeclared(kind, symbol.name)
|
raise Redeclared(kind, symbol.name)
|
||||||
|
|
||||||
def mergeGlobal2Local(self, local_scope, global_scope):
|
|
||||||
for s in global_scope:
|
|
||||||
res = self.lookup(s.name, local_scope, lambda e: e.name.lower())
|
|
||||||
if res is None:
|
|
||||||
local_scope.append(s)
|
|
||||||
|
|
||||||
def checkTypeCompatibility(self, lhs, rhs, error):
|
def checkTypeCompatibility(self, lhs, rhs, error):
|
||||||
# array check
|
# array check
|
||||||
if isinstance(lhs, ArrayType):
|
if isinstance(lhs, ArrayType):
|
||||||
|
@ -2,46 +2,23 @@ from utils.Utils import Utils
|
|||||||
from utils.Visitor import BaseVisitor
|
from utils.Visitor import BaseVisitor
|
||||||
from checker.StaticCheck import MType, Symbol
|
from checker.StaticCheck import MType, Symbol
|
||||||
from utils.AST import (
|
from utils.AST import (
|
||||||
# Type,
|
|
||||||
IntType,
|
IntType,
|
||||||
FloatType,
|
FloatType,
|
||||||
BoolType,
|
BoolType,
|
||||||
StringType,
|
StringType,
|
||||||
# ArrayType,
|
|
||||||
VoidType,
|
VoidType,
|
||||||
# Program,
|
|
||||||
# Decl,
|
|
||||||
# VarDecl,
|
|
||||||
FuncDecl,
|
FuncDecl,
|
||||||
# Stmt,
|
|
||||||
Assign,
|
Assign,
|
||||||
# If,
|
|
||||||
While,
|
While,
|
||||||
# For,
|
|
||||||
# Break,
|
|
||||||
# Continue,
|
|
||||||
# Return,
|
|
||||||
# With,
|
|
||||||
# CallStmt,
|
|
||||||
# Expr,
|
|
||||||
BinaryOp,
|
BinaryOp,
|
||||||
# UnaryOp,
|
|
||||||
# CallExpr,
|
|
||||||
# LHS,
|
|
||||||
Id,
|
Id,
|
||||||
# ArrayCell,
|
|
||||||
# Literal,
|
|
||||||
IntLiteral,
|
IntLiteral,
|
||||||
# FloatLiteral,
|
|
||||||
# StringLiteral,
|
|
||||||
# BooleanLiteral,
|
|
||||||
ArrayPointerType,
|
ArrayPointerType,
|
||||||
ClassType
|
ClassType
|
||||||
)
|
)
|
||||||
from codegen.Emitter import Emitter
|
from codegen.Emitter import Emitter
|
||||||
from codegen.Frame import Frame
|
from codegen.Frame import Frame
|
||||||
from abc import ABC # , abstractmethod
|
from abc import ABC
|
||||||
# from functools import reduce
|
|
||||||
|
|
||||||
|
|
||||||
class CodeGenerator(Utils):
|
class CodeGenerator(Utils):
|
||||||
|
67
mpc.py
67
mpc.py
@ -1,21 +1,25 @@
|
|||||||
import sys
|
|
||||||
import os
|
import os
|
||||||
import subprocess
|
import subprocess
|
||||||
|
import click
|
||||||
|
|
||||||
from antlr4 import FileStream, CommonTokenStream # Token
|
from antlr4 import FileStream, CommonTokenStream
|
||||||
from antlr4.error.ErrorListener import ConsoleErrorListener # ErrorListener
|
from antlr4.error.ErrorListener import ConsoleErrorListener
|
||||||
|
|
||||||
|
from tools.genANTLR4 import generate, regenerate
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from parser.MPLexer import MPLexer as Lexer
|
# dynamic loading of ANTLR4 files
|
||||||
from parser.MPParser import MPParser as Parser
|
from parser import MPLexer, MPParser # type: ignore
|
||||||
|
except ImportError:
|
||||||
|
generate()
|
||||||
|
from parser import MPLexer, MPParser # type: ignore
|
||||||
|
|
||||||
from astgen.ASTGeneration import ASTGeneration
|
from astgen.ASTGeneration import ASTGeneration
|
||||||
from checker.StaticCheck import StaticChecker
|
from checker.StaticCheck import StaticChecker
|
||||||
from codegen.CodeGenerator import CodeGenerator
|
from codegen.CodeGenerator import CodeGenerator
|
||||||
except ModuleNotFoundError:
|
|
||||||
print('Generate ANTLR4 first')
|
|
||||||
print('python genANTLR4.py')
|
|
||||||
exit(1)
|
|
||||||
|
|
||||||
|
Lexer = MPLexer.MPLexer # load from module
|
||||||
|
Parser = MPParser.MPParser # load from module
|
||||||
|
|
||||||
ANTLR_JAR = os.environ.get('ANTLR_LIB')
|
ANTLR_JAR = os.environ.get('ANTLR_LIB')
|
||||||
JASMIN_JAR = './external/jasmin.jar'
|
JASMIN_JAR = './external/jasmin.jar'
|
||||||
@ -43,7 +47,7 @@ class NewErrorListener(ConsoleErrorListener):
|
|||||||
offendingSymbol.text)
|
offendingSymbol.text)
|
||||||
|
|
||||||
|
|
||||||
def compile(inputfile):
|
def compile(inputfile, output):
|
||||||
lexer = Lexer(FileStream(inputfile))
|
lexer = Lexer(FileStream(inputfile))
|
||||||
tokens = CommonTokenStream(lexer)
|
tokens = CommonTokenStream(lexer)
|
||||||
try:
|
try:
|
||||||
@ -68,14 +72,14 @@ def compile(inputfile):
|
|||||||
checker = StaticChecker(asttree)
|
checker = StaticChecker(asttree)
|
||||||
checker.check()
|
checker.check()
|
||||||
|
|
||||||
path = os.path.dirname(inputfile)
|
path = output
|
||||||
filename = os.path.basename(inputfile).split('.')[0]
|
filename = os.path.basename(inputfile).split('.')[0]
|
||||||
|
jasmin_file = '{}/{}.j'.format(path, filename)
|
||||||
codeGen = CodeGenerator()
|
codeGen = CodeGenerator()
|
||||||
codeGen.gen(asttree, path, filename)
|
codeGen.gen(asttree, path, filename)
|
||||||
|
|
||||||
subprocess.call(
|
subprocess.call(
|
||||||
# "java -jar " + JASMIN_JAR + " " + path + "/MPClass.j",
|
"java -jar {} {} -d {}".format(JASMIN_JAR, jasmin_file, path),
|
||||||
"java -jar {} {}/{}.j -d {}".format(JASMIN_JAR, path, filename, path),
|
|
||||||
shell=True,
|
shell=True,
|
||||||
stderr=subprocess.STDOUT
|
stderr=subprocess.STDOUT
|
||||||
)
|
)
|
||||||
@ -84,7 +88,6 @@ def compile(inputfile):
|
|||||||
iofile.write(open('libs/io.class', 'rb').read())
|
iofile.write(open('libs/io.class', 'rb').read())
|
||||||
|
|
||||||
subprocess.call(
|
subprocess.call(
|
||||||
# 'jar cvfm {}/{}.jar {}/manifest.mf {} {}.class'.format(
|
|
||||||
'jar cvfe {0}.jar {0} io.class {0}.class'.format(
|
'jar cvfe {0}.jar {0} io.class {0}.class'.format(
|
||||||
filename
|
filename
|
||||||
),
|
),
|
||||||
@ -93,20 +96,42 @@ def compile(inputfile):
|
|||||||
stderr=subprocess.STDOUT
|
stderr=subprocess.STDOUT
|
||||||
)
|
)
|
||||||
|
|
||||||
os.remove('{}/{}.j'.format(path, filename))
|
os.remove(jasmin_file)
|
||||||
os.remove('{}/{}.class'.format(path, filename))
|
os.remove('{}/{}.class'.format(path, filename))
|
||||||
os.remove('{}/io.class'.format(path))
|
os.remove('{}/io.class'.format(path))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
@click.command()
|
||||||
argv = sys.argv
|
@click.argument('file')
|
||||||
if len(argv) != 2:
|
@click.option(
|
||||||
exit(1)
|
'--output', default='',
|
||||||
|
help='Where the jar file will be after compile')
|
||||||
|
@click.option(
|
||||||
|
'--regen', is_flag=True,
|
||||||
|
help='Regenerate antlr4 files and exit'
|
||||||
|
)
|
||||||
|
def main(file, output, regen):
|
||||||
|
if regen:
|
||||||
|
regenerate()
|
||||||
|
print("ANTLR4 files regenerate succesfully")
|
||||||
|
return
|
||||||
|
|
||||||
|
if output == '':
|
||||||
|
output = os.path.dirname(file)
|
||||||
|
else:
|
||||||
|
# check if directory, create
|
||||||
|
|
||||||
|
# if file path, then
|
||||||
|
pass
|
||||||
|
|
||||||
|
print("Compiling {}".format(os.path.relpath(file)))
|
||||||
try:
|
try:
|
||||||
print("Compiling {}".format(os.path.relpath(argv[1])))
|
compile(file, output)
|
||||||
compile(argv[1])
|
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
print(e)
|
print(e)
|
||||||
exit(1)
|
exit(1)
|
||||||
print("Compiled successfully")
|
print("Compiled successfully")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
0
tools/__init__.py
Normal file
0
tools/__init__.py
Normal file
@ -3,23 +3,23 @@ import subprocess
|
|||||||
|
|
||||||
|
|
||||||
ANTLR_JAR = os.environ.get('ANTLR_LIB')
|
ANTLR_JAR = os.environ.get('ANTLR_LIB')
|
||||||
JASMIN_JAR = './external/jasmin.jar'
|
|
||||||
|
|
||||||
if ANTLR_JAR is None:
|
if ANTLR_JAR is None:
|
||||||
# fall back, not recommended
|
# fall back, not recommended
|
||||||
ANTLR_JAR = './external/antrl4.jar'
|
ANTLR_JAR = '../external/antrl4.jar'
|
||||||
|
|
||||||
|
files_from_antlr4 = [
|
||||||
|
'parser/MP.interp',
|
||||||
|
'parser/MPLexer.interp',
|
||||||
|
'parser/MPLexer.py',
|
||||||
|
'parser/MPLexer.tokens',
|
||||||
|
'parser/MPParser.py',
|
||||||
|
'parser/MP.tokens',
|
||||||
|
'parser/MPVisitor.py'
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def generate():
|
def generate():
|
||||||
files_from_antlr4 = [
|
|
||||||
'MP.interp',
|
|
||||||
'MPLexer.interp',
|
|
||||||
'MPLexer.py',
|
|
||||||
'MPLexer.tokens',
|
|
||||||
'MPParser.py',
|
|
||||||
'MP.tokens',
|
|
||||||
'MPVisitor.py'
|
|
||||||
]
|
|
||||||
if all(list(map(os.path.isfile, files_from_antlr4))):
|
if all(list(map(os.path.isfile, files_from_antlr4))):
|
||||||
return
|
return
|
||||||
gen_antlr_class_cmd = [
|
gen_antlr_class_cmd = [
|
||||||
@ -33,5 +33,7 @@ def generate():
|
|||||||
subprocess.run(gen_antlr_class_cmd)
|
subprocess.run(gen_antlr_class_cmd)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
def regenerate():
|
||||||
|
for f in files_from_antlr4:
|
||||||
|
os.remove(f)
|
||||||
generate()
|
generate()
|
Loading…
Reference in New Issue
Block a user