Refactor code
Change mpc.py to auto generate antlr4 files without having to run manualy. Also add a small command line ultility. Remove commented import in others files, and update README
This commit is contained in:
parent
d33f3a54ba
commit
d6be336a0c
78
README.md
78
README.md
@ -1,8 +1,8 @@
|
||||
# MP Compiler
|
||||
|
||||
From my Principle of Programming Languages assignment, I have created a compiler for the MP language. The assignment phase is divided to 4 phases, from doing Lexer, Parser, AST generation to Static Checker and Jasmin Code generation.
|
||||
From my Principle of Programming Languages assignment, I have created a compiler for the MP language. The assignment is divided to 4 phases, from Lexer and Parser, to AST generation to Static Checker and Jasmin Code generation.
|
||||
|
||||
The assignment code structure is quite ugle, so I re-organized the code, adding some more steps to make the code look nicer and compile a \*.mp file to a jar file.
|
||||
The assignment code structure is quite ugly, so I re-organized the code, adding some more steps to make the code look nicer and compile a `*.mp` file to a `jar` file.
|
||||
|
||||
Given the mp file as follows:
|
||||
|
||||
@ -24,13 +24,73 @@ java -jar hello.jar
|
||||
More documentation is being built.
|
||||
|
||||
|
||||
## Notes
|
||||
## Project Structure
|
||||
``
|
||||
.
|
||||
├── mpc.py
|
||||
├── tests
|
||||
├── astgen
|
||||
│ ├── ASTGeneration.py
|
||||
│ ├── __init__.py
|
||||
├── checker
|
||||
│ ├── __init__.py
|
||||
│ ├── StaticCheck.py
|
||||
│ └── StaticError.py
|
||||
├── codegen
|
||||
│ ├── CodeGenerator.py
|
||||
│ ├── CodeGenError.py
|
||||
│ ├── Emitter.py
|
||||
│ ├── Frame.py
|
||||
│ ├── __init__.py
|
||||
│ ├── MachineCode.py
|
||||
├── external
|
||||
│ ├── antrl4.jar
|
||||
│ └── jasmin.jar
|
||||
├── libs
|
||||
│ ├── io.class
|
||||
│ └── io.java
|
||||
├── MP_specifications
|
||||
│ ├── assignment1.pdf
|
||||
│ ├── assignment2.pdf
|
||||
│ ├── assignment3.pdf
|
||||
│ ├── assignment4.pdf
|
||||
│ └── MP.pdf
|
||||
├── parser
|
||||
│ ├── __init__.py
|
||||
│ ├── lexererr.py
|
||||
│ ├── MP.g4
|
||||
│ ├── MP.interp
|
||||
│ ├── MPLexer.interp
|
||||
│ ├── MPLexer.py
|
||||
│ ├── MPLexer.tokens
|
||||
│ ├── MPParser.py
|
||||
│ ├── MP.tokens
|
||||
│ ├── MPVisitor.py
|
||||
├── tools
|
||||
│ ├── genANTLR4.py
|
||||
│ ├── __init__.py
|
||||
└── utils
|
||||
├── AST.py
|
||||
├── __init__.py
|
||||
├── Utils.py
|
||||
└── Visitor.py
|
||||
``````
|
||||
All files is categorized and put into their own folder, turning them to a module by using `__init__.py` and import them by using the syntax `from package.module import module`. This makes the code easier to read, easier to find code, rather than altering the system path. The files in parser are created by running antlr4 generator from the given `MP.g4` file. Import this module is a little hard, but an exception can be catch upon importing the module to generate the neccessary files and import the module afterward.
|
||||
|
||||
Because I was having serious deadlines at the end of the semester, I drop on working on ArrayCell, which will be added later.
|
||||
Most of the code is of my own teacher, Nguyen Hua Phung, I have no plan to refactor all this code as the architecture is just right. So all rights go to my teacher. The only thing I do is refactor part of the code and implement the algorithm.
|
||||
|
||||
Because the lexer and parser are given by the famous `ANTLR4` engine, there should exists a path to antlr4.jar on the environment variable `ANTLR_LIB` or else, the program will use the antlr4 file in the external folder.
|
||||
Test folder is currently in manual testing. There will be a unit test to test functions in the future, maybe.
|
||||
|
||||
|
||||
## Development
|
||||
|
||||
### TODO
|
||||
|
||||
+ Finish Array in CodeGen
|
||||
+ Add unit test frame
|
||||
+ Add magic???
|
||||
|
||||
### Magic
|
||||
|
||||
The specification for this language is a one-file compile, but I want to make it able to work in multiple files, maybe in so-far future I will try to make this possible. Also, why not make a language highlighter for this language?
|
||||
|
||||
Before running `mpc.py`, you must be sure that you have generate neccessary files from ANTLR4.
|
||||
```shell
|
||||
python genANTRL4.py
|
||||
```
|
||||
|
@ -2,19 +2,15 @@ from parser.MPVisitor import MPVisitor
|
||||
from parser.MPParser import MPParser
|
||||
from functools import reduce
|
||||
|
||||
# * is not a good use case
|
||||
from utils.AST import (
|
||||
IntType,
|
||||
FloatType,
|
||||
BoolType,
|
||||
StringType,
|
||||
ArrayType,
|
||||
# VoidType,
|
||||
Program,
|
||||
# Decl,
|
||||
VarDecl,
|
||||
FuncDecl,
|
||||
# Stmt,
|
||||
Assign,
|
||||
If,
|
||||
While,
|
||||
@ -24,14 +20,11 @@ from utils.AST import (
|
||||
Return,
|
||||
With,
|
||||
CallStmt,
|
||||
# Expr,
|
||||
BinaryOp,
|
||||
UnaryOp,
|
||||
CallExpr,
|
||||
# LHS,
|
||||
Id,
|
||||
ArrayCell,
|
||||
# Literal,
|
||||
IntLiteral,
|
||||
FloatLiteral,
|
||||
StringLiteral,
|
||||
|
@ -1,13 +1,11 @@
|
||||
from utils.Visitor import BaseVisitor
|
||||
from utils.Utils import Utils
|
||||
from checker.StaticError import (
|
||||
# Kind,
|
||||
Function,
|
||||
Procedure,
|
||||
Variable,
|
||||
Parameter,
|
||||
Identifier,
|
||||
# StaticError,
|
||||
Undeclared,
|
||||
Redeclared,
|
||||
TypeMismatchInExpression,
|
||||
@ -26,32 +24,8 @@ from utils.AST import (
|
||||
StringType,
|
||||
ArrayType,
|
||||
VoidType,
|
||||
# Program,
|
||||
# Decl,
|
||||
# VarDecl,
|
||||
FuncDecl,
|
||||
# Stmt,
|
||||
# Assign,
|
||||
# If,
|
||||
# While,
|
||||
# For,
|
||||
# Break,
|
||||
# Continue,
|
||||
# Return,
|
||||
# With,
|
||||
# CallStmt,
|
||||
# Expr,
|
||||
# BinaryOp,
|
||||
# UnaryOp,
|
||||
CallExpr,
|
||||
# LHS,
|
||||
# Id,
|
||||
# ArrayCell,
|
||||
# Literal,
|
||||
# IntLiteral,
|
||||
# FloatLiteral,
|
||||
# StringLiteral,
|
||||
# BooleanLiteral
|
||||
)
|
||||
from functools import reduce
|
||||
|
||||
@ -75,9 +49,10 @@ class Symbol:
|
||||
self.value = value
|
||||
|
||||
def __str__(self):
|
||||
return 'Symbol({},{})'.format(
|
||||
return 'Symbol({},{},{})'.format(
|
||||
self.name,
|
||||
str(self.mtype)
|
||||
str(self.mtype),
|
||||
str(self.value)
|
||||
)
|
||||
|
||||
|
||||
@ -138,12 +113,6 @@ class StaticChecker(BaseVisitor, Utils):
|
||||
if res is not None:
|
||||
raise Redeclared(kind, symbol.name)
|
||||
|
||||
def mergeGlobal2Local(self, local_scope, global_scope):
|
||||
for s in global_scope:
|
||||
res = self.lookup(s.name, local_scope, lambda e: e.name.lower())
|
||||
if res is None:
|
||||
local_scope.append(s)
|
||||
|
||||
def checkTypeCompatibility(self, lhs, rhs, error):
|
||||
# array check
|
||||
if isinstance(lhs, ArrayType):
|
||||
|
@ -2,46 +2,23 @@ from utils.Utils import Utils
|
||||
from utils.Visitor import BaseVisitor
|
||||
from checker.StaticCheck import MType, Symbol
|
||||
from utils.AST import (
|
||||
# Type,
|
||||
IntType,
|
||||
FloatType,
|
||||
BoolType,
|
||||
StringType,
|
||||
# ArrayType,
|
||||
VoidType,
|
||||
# Program,
|
||||
# Decl,
|
||||
# VarDecl,
|
||||
FuncDecl,
|
||||
# Stmt,
|
||||
Assign,
|
||||
# If,
|
||||
While,
|
||||
# For,
|
||||
# Break,
|
||||
# Continue,
|
||||
# Return,
|
||||
# With,
|
||||
# CallStmt,
|
||||
# Expr,
|
||||
BinaryOp,
|
||||
# UnaryOp,
|
||||
# CallExpr,
|
||||
# LHS,
|
||||
Id,
|
||||
# ArrayCell,
|
||||
# Literal,
|
||||
IntLiteral,
|
||||
# FloatLiteral,
|
||||
# StringLiteral,
|
||||
# BooleanLiteral,
|
||||
ArrayPointerType,
|
||||
ClassType
|
||||
)
|
||||
from codegen.Emitter import Emitter
|
||||
from codegen.Frame import Frame
|
||||
from abc import ABC # , abstractmethod
|
||||
# from functools import reduce
|
||||
from abc import ABC
|
||||
|
||||
|
||||
class CodeGenerator(Utils):
|
||||
|
67
mpc.py
67
mpc.py
@ -1,21 +1,25 @@
|
||||
import sys
|
||||
import os
|
||||
import subprocess
|
||||
import click
|
||||
|
||||
from antlr4 import FileStream, CommonTokenStream # Token
|
||||
from antlr4.error.ErrorListener import ConsoleErrorListener # ErrorListener
|
||||
from antlr4 import FileStream, CommonTokenStream
|
||||
from antlr4.error.ErrorListener import ConsoleErrorListener
|
||||
|
||||
from tools.genANTLR4 import generate, regenerate
|
||||
|
||||
try:
|
||||
from parser.MPLexer import MPLexer as Lexer
|
||||
from parser.MPParser import MPParser as Parser
|
||||
# dynamic loading of ANTLR4 files
|
||||
from parser import MPLexer, MPParser # type: ignore
|
||||
except ImportError:
|
||||
generate()
|
||||
from parser import MPLexer, MPParser # type: ignore
|
||||
|
||||
from astgen.ASTGeneration import ASTGeneration
|
||||
from checker.StaticCheck import StaticChecker
|
||||
from codegen.CodeGenerator import CodeGenerator
|
||||
except ModuleNotFoundError:
|
||||
print('Generate ANTLR4 first')
|
||||
print('python genANTLR4.py')
|
||||
exit(1)
|
||||
|
||||
Lexer = MPLexer.MPLexer # load from module
|
||||
Parser = MPParser.MPParser # load from module
|
||||
|
||||
ANTLR_JAR = os.environ.get('ANTLR_LIB')
|
||||
JASMIN_JAR = './external/jasmin.jar'
|
||||
@ -43,7 +47,7 @@ class NewErrorListener(ConsoleErrorListener):
|
||||
offendingSymbol.text)
|
||||
|
||||
|
||||
def compile(inputfile):
|
||||
def compile(inputfile, output):
|
||||
lexer = Lexer(FileStream(inputfile))
|
||||
tokens = CommonTokenStream(lexer)
|
||||
try:
|
||||
@ -68,14 +72,14 @@ def compile(inputfile):
|
||||
checker = StaticChecker(asttree)
|
||||
checker.check()
|
||||
|
||||
path = os.path.dirname(inputfile)
|
||||
path = output
|
||||
filename = os.path.basename(inputfile).split('.')[0]
|
||||
jasmin_file = '{}/{}.j'.format(path, filename)
|
||||
codeGen = CodeGenerator()
|
||||
codeGen.gen(asttree, path, filename)
|
||||
|
||||
subprocess.call(
|
||||
# "java -jar " + JASMIN_JAR + " " + path + "/MPClass.j",
|
||||
"java -jar {} {}/{}.j -d {}".format(JASMIN_JAR, path, filename, path),
|
||||
"java -jar {} {} -d {}".format(JASMIN_JAR, jasmin_file, path),
|
||||
shell=True,
|
||||
stderr=subprocess.STDOUT
|
||||
)
|
||||
@ -84,7 +88,6 @@ def compile(inputfile):
|
||||
iofile.write(open('libs/io.class', 'rb').read())
|
||||
|
||||
subprocess.call(
|
||||
# 'jar cvfm {}/{}.jar {}/manifest.mf {} {}.class'.format(
|
||||
'jar cvfe {0}.jar {0} io.class {0}.class'.format(
|
||||
filename
|
||||
),
|
||||
@ -93,20 +96,42 @@ def compile(inputfile):
|
||||
stderr=subprocess.STDOUT
|
||||
)
|
||||
|
||||
os.remove('{}/{}.j'.format(path, filename))
|
||||
os.remove(jasmin_file)
|
||||
os.remove('{}/{}.class'.format(path, filename))
|
||||
os.remove('{}/io.class'.format(path))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
argv = sys.argv
|
||||
if len(argv) != 2:
|
||||
exit(1)
|
||||
@click.command()
|
||||
@click.argument('file')
|
||||
@click.option(
|
||||
'--output', default='',
|
||||
help='Where the jar file will be after compile')
|
||||
@click.option(
|
||||
'--regen', is_flag=True,
|
||||
help='Regenerate antlr4 files and exit'
|
||||
)
|
||||
def main(file, output, regen):
|
||||
if regen:
|
||||
regenerate()
|
||||
print("ANTLR4 files regenerate succesfully")
|
||||
return
|
||||
|
||||
if output == '':
|
||||
output = os.path.dirname(file)
|
||||
else:
|
||||
# check if directory, create
|
||||
|
||||
# if file path, then
|
||||
pass
|
||||
|
||||
print("Compiling {}".format(os.path.relpath(file)))
|
||||
try:
|
||||
print("Compiling {}".format(os.path.relpath(argv[1])))
|
||||
compile(argv[1])
|
||||
compile(file, output)
|
||||
except BaseException as e:
|
||||
print(e)
|
||||
exit(1)
|
||||
print("Compiled successfully")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
0
tools/__init__.py
Normal file
0
tools/__init__.py
Normal file
@ -3,23 +3,23 @@ import subprocess
|
||||
|
||||
|
||||
ANTLR_JAR = os.environ.get('ANTLR_LIB')
|
||||
JASMIN_JAR = './external/jasmin.jar'
|
||||
|
||||
if ANTLR_JAR is None:
|
||||
# fall back, not recommended
|
||||
ANTLR_JAR = './external/antrl4.jar'
|
||||
ANTLR_JAR = '../external/antrl4.jar'
|
||||
|
||||
files_from_antlr4 = [
|
||||
'parser/MP.interp',
|
||||
'parser/MPLexer.interp',
|
||||
'parser/MPLexer.py',
|
||||
'parser/MPLexer.tokens',
|
||||
'parser/MPParser.py',
|
||||
'parser/MP.tokens',
|
||||
'parser/MPVisitor.py'
|
||||
]
|
||||
|
||||
|
||||
def generate():
|
||||
files_from_antlr4 = [
|
||||
'MP.interp',
|
||||
'MPLexer.interp',
|
||||
'MPLexer.py',
|
||||
'MPLexer.tokens',
|
||||
'MPParser.py',
|
||||
'MP.tokens',
|
||||
'MPVisitor.py'
|
||||
]
|
||||
if all(list(map(os.path.isfile, files_from_antlr4))):
|
||||
return
|
||||
gen_antlr_class_cmd = [
|
||||
@ -33,5 +33,7 @@ def generate():
|
||||
subprocess.run(gen_antlr_class_cmd)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
def regenerate():
|
||||
for f in files_from_antlr4:
|
||||
os.remove(f)
|
||||
generate()
|
Loading…
Reference in New Issue
Block a user