update parser

This commit is contained in:
nganhkhoa 2024-02-19 02:54:02 +07:00
parent 546af734e7
commit 6a27f8fdf8
4 changed files with 516 additions and 61 deletions

View File

@ -17,54 +17,133 @@ keywords = {
| "import"
| "module"
| "export"
| "record"
| "tuple"
| "type"
| "enum"
| "auto"
}
identifier = @{ !keywords ~ ASCII_ALPHA ~ ASCII_ALPHANUMERIC* }
function_type_param = { type }
function_type_return = { type }
// if identifier starts with something like a keyword, it must follows something else
// in any case, the identifier cannot begin with a digit
identifier = @{ keywords? ~ ("_" | ASCII_ALPHANUMERIC)+
| !keywords ~ ASCII_ALPHA ~ ("_" | ASCII_ALPHANUMERIC)* }
function_type_param = { type* }
function_type_return = _{ type }
function_type = {
"fn" ~ function_type_param* ~ "=>" ~ function_type_return
"fn" ~ function_type_param ~ "=>" ~ function_type_return
}
type = {
// record name { prop: type, prop: type, prop: type }
record_type_field = { identifier ~ ":" ~ type }
record_type_fields = _{ record_type_field ~ "," ~ record_type_fields | record_type_field }
record_type = {
"record" ~ "{" ~ record_type_fields ~ "}"
}
// tuple (type, type)
tuple_type_item = _{ type }
tuple_type_item_list = _{ tuple_type_item ~ "," ~ tuple_type_item_list | tuple_type_item }
tuple_type = {
"tuple" ~ "(" ~ tuple_type_item_list ~ ")"
}
// enum name { name: type, name: type }
enum_field = { identifier ~ ":" ~ type }
enum_fields = _{ enum_field ~ "," ~ enum_fields | enum_field }
enum_type = {
"enum" ~ "{" ~ enum_fields ~ "}"
}
// allow for type alias, aka create new type
type_name = { identifier }
type_declaration = { "type" ~ type_name ~ "=" ~ type }
type_number = { "number" }
type_string = { "string" }
type_bool = { "bool" }
// for gradual typing
type_auto = { "?" | "auto" }
// higher-kinded types will be added later
// this will be very complex to parse,
// because allowing types to receive an argument as type
type = _{
function_type
| "number"
| "bool"
| "string"
| record_type
| tuple_type
| enum_type
| type_name
| type_number
| type_bool
| type_string
}
number = @{ ASCII_DIGIT+ }
string = @{ "\"" ~ !("\"") ~ ASCII_ALPHANUMERIC* ~ "\"" }
bool = @{ "true" | "false" }
function = { "function" ~ function_param* ~ "is" ~ function_body }
function_param = { identifier }
function_body = {
binding* ~ expr
tuple = { "tuple" ~ "(" ~ tuple_item_list ~ ")" }
tuple_item_list = _{ expr ~ "," ~ tuple_item_list | expr }
record = { "record" ~ "{" ~ record_item_list ~ "}" }
record_item = { identifier ~ ":" ~ expr }
record_item_list = _{ record_item ~ "," ~ record_item_list | record_item }
function = { "function" ~ function_params ~ "is" ~ function_body }
function_params = { function_param* }
function_param = _{ identifier }
function_body = _{
bindings ~ expr
}
bindings = { binding* }
binding = {
"let" ~ identifier ~ "=" ~ expr ~ "in"
}
// will be expr in later version of the language
if_cond = { identifier }
if_branch = { identifier }
else_branch = { identifier }
if_cond = _{ expr }
if_branch = _{ expr }
else_branch = _{ expr }
if_expr = {
// "if" ~ expr ~ "then" ~ expr ~ "else" ~ expr
"if" ~ if_cond ~ "then" ~ if_branch ~ "else" ~ else_branch
}
call_function = { identifier }
call_param = { identifier | identifier ~ call_param }
call_function = _{ identifier }
call_param_list = _{ expr ~ ("," ~ expr)* }
call_param = { call_param_list? }
call_expr = {
call_function ~ "(" ~ call_param? ~ ")"
call_function ~ "(" ~ call_param ~ ")"
}
binop = _{
add
| sub
| div
| mul
| mod
| and
| or
| xor
}
add = { "+" }
sub = { "-" }
div = { "/" }
mul = { "*" }
mod = { "%" }
and = { "&" }
or = { "|" }
xor = { "^" }
// disambiguous between arithmetic and logical operators
// add comparision
expr = {
single_expr ~ (binop ~ single_expr)*
}
single_expr = _{
if_expr
| call_expr
| identifier
| value
| "(" ~ expr ~ ")"
}
// function is a value only if we allow to have thunks
@ -74,17 +153,22 @@ value = _{
number
| string
| bool
| tuple
| record
| function
}
declaration = {
variable_declaration = {
identifier ~ ":" ~ type ~ "=" ~ expr
}
declaration = _{
type_declaration
| variable_declaration
}
module_name = @{ identifier }
module_name = _{ identifier }
export_list = _{
identifier
| identifier ~ "," ~ export_list
identifier ~ ("," ~ identifier)*
}
module_export = {
"export" ~ export_list
@ -94,24 +178,24 @@ module_export = {
// import module only name, name
// import module as change_name
import_list = _{
identifier ~ "," ~ import_list
| identifier
identifier ~ ("," ~ identifier)*
}
import_selective = { "only" ~ import_list }
import_change_name = { "as" ~ module_name }
import_types = {
import_types = _{
import_selective
| import_change_name
}
module_import = { "import" ~ module_name ~ import_types? }
module_export_import = { (module_export | module_import)* }
module_declare = {
"module" ~ module_name ~ module_export* ~ module_import*
"module" ~ module_name ~ module_export_import
}
program = {
module_declare ~ declaration*
SOI ~ module_declare ~ declaration*
}
WHITESPACE = _{ " " | NEWLINE }
COMMENT = _{ ";.*" ~ NEWLINE }
COMMENT = _{ ";" ~ (!NEWLINE ~ ANY)* ~ NEWLINE }

View File

@ -4,30 +4,365 @@ pub struct AlbireoParser;
use pest::iterators::Pair;
struct Module {
use std::vec::Vec;
use std::collections::HashMap;
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub struct Identifier {
name: String,
declaration: HashMap<String, Declaration>
}
struct Declaration {
type: Type,
definition: Expression,
#[derive(Clone, Debug)]
pub struct Module {
module: ModuleInformation,
declaration: Vec<Declaration>
}
enum Type {
#[derive(Clone, Debug)]
pub struct ModuleInformation {
name: Identifier,
export: Export,
import: Vec<Import>,
}
#[derive(Clone, Debug)]
pub enum Export {
All,
Selective(Vec<Identifier>),
}
#[derive(Clone, Debug)]
pub struct Import {
from: Identifier,
rename: Identifier, // change name, perhaps
imported: Vec<Identifier>,
}
#[derive(Clone, Debug)]
pub enum Declaration {
Type(Identifier, Type),
Variable(Identifier, Type, Box<Expression>),
}
#[derive(Clone, Debug)]
pub enum Type {
Identifier(Identifier),
Number,
String,
Bool,
Function(/*inputs*/Vec<Type>, /*output*/Type)
Function(/*inputs*/Vec<Type>, /*output*/Box<Type>),
Tuple(Vec<Type>),
}
// limitation as of now, everything must be passed in as identifier
enum Expression {
IfClause(String, String, String),
Application(/*function name*/String, /*params*/Vec<String>), // function call
Identifier(String),
#[derive(Clone, Debug)]
pub enum BinaryOp {
Add, Sub, Div, Mul, Mod, And, Or, Xor
}
impl Into<BinaryOp> for &str {
fn into(self) -> BinaryOp {
match self {
"+" => BinaryOp::Add,
"-" => BinaryOp::Sub,
"*" => BinaryOp::Mul,
"/" => BinaryOp::Div,
"%" => BinaryOp::Mod,
// "<" => BinaryOp::Lt,
// ">" => BinaryOp::Gt,
// "<=" => BinaryOp::Le,
// ">=" => BinaryOp::Ge,
// "==" => BinaryOp::Eq,
// "!=" => BinaryOp::Neq,
// "|" => BinaryOp::ArithOr,
// "&" => BinaryOp::ArithAnd,
// "||" => BinaryOp::LogicalOr,
// "&&" => BinaryOp::LogicalAnd,
// "<<" => BinaryOp::ShiftLeft,
// ">>" => BinaryOp::ShiftRight,
// "**" => BinaryOp::Power,
_ => {
unreachable!()
}
}
}
}
#[derive(Clone, Debug)]
pub enum Expression {
IfClause(Box<Expression>, Box<Expression>, Box<Expression>),
Application(/*function name*/Identifier, /*params*/Vec<Box<Expression>>), // function call
Identifier(Identifier),
Number(u64),
String(String),
Bool(bool),
Function(String, Vec<Binding>, Expression),
Function(/*params*/Vec<Identifier>, Vec<Binding>, Box<Expression>),
Binary(Box<Expression>, BinaryOp, Box<Expression>),
Tuple(Vec<Box<Expression>>),
}
#[derive(Clone, Debug)]
pub struct Binding {
lhs: Identifier,
rhs: Box<Expression>,
}
fn parse_definition(parsed: Pair<Rule>) -> Box<Expression> {
parse_expr(parsed)
}
fn parse_expr(parsed: Pair<Rule>) -> Box<Expression> {
use pest::pratt_parser::PrattParser;
use pest::pratt_parser::{Assoc, Op};
let pratt =
PrattParser::new()
.op(Op::infix(Rule::r#mod, Assoc::Left))
.op(Op::infix(Rule::add, Assoc::Left) | Op::infix(Rule::sub, Assoc::Left))
.op(Op::infix(Rule::mul, Assoc::Left) | Op::infix(Rule::div, Assoc::Left))
;
let expr = pratt
.map_primary(|p| {
parse_single_expr(p)
})
.map_infix(|lhs, op, rhs| {
Box::new(Expression::Binary(lhs.clone(), op.as_str().into(), rhs.clone()))
})
.parse(parsed.into_inner());
expr
}
fn parse_bindings(parsed: Pair<Rule>) -> Binding {
let mut p = parsed.into_inner();
Binding {
lhs: parse_identifier(p.next().unwrap()),
rhs: parse_expr(p.next().unwrap()),
}
}
fn parse_function_expr(parsed: Pair<Rule>) -> Option<Box<Expression>> {
let mut p = parsed.into_inner();
let parsed_params = p.next()?;
let parsed_bindings = p.next()?;
let parsed_body = p.next()?;
let params = parsed_params.into_inner().map(parse_identifier).collect();
let bindings = parsed_bindings.into_inner().map(parse_bindings).collect();
let body = parse_expr(parsed_body);
Some(Box::new(Expression::Function(params, bindings, body)))
}
fn parse_if_expr(parsed: Pair<Rule>) -> Option<Box<Expression>> {
let mut p = parsed.into_inner();
let condition = parse_expr(p.next()?);
let ifbranch = parse_expr(p.next()?);
let elsebranch = parse_expr(p.next()?);
Some(Box::new(Expression::IfClause(condition, ifbranch, elsebranch)))
}
fn parse_call_expr(parsed: Pair<Rule>) -> Option<Box<Expression>> {
let mut p = parsed.into_inner();
let identifier = parse_identifier(p.next()?);
let parsed_params = p.next()?;
let params = parsed_params.into_inner().map(parse_expr).collect();
Some(Box::new(Expression::Application(identifier, params)))
}
fn parse_single_expr(parsed: Pair<Rule>) -> Box<Expression> {
match parsed.as_rule() {
Rule::if_expr => {
parse_if_expr(parsed).unwrap()
},
Rule::call_expr => {
parse_call_expr(parsed).unwrap()
},
Rule::identifier => {
Box::new(Expression::Identifier(parse_identifier(parsed)))
},
Rule::number => {
let num = parsed.as_str();
Box::new(Expression::Number(num.parse().unwrap()))
},
// Rule::string => {},
Rule::bool => {
if parsed.as_str() == "false" {
Box::new(Expression::Bool(false))
} else {
Box::new(Expression::Bool(true))
}
},
Rule::tuple => {
let mut tuple = Vec::new();
let mut p = parsed.into_inner();
while let Some(e) = p.next() {
tuple.push(parse_expr(e));
}
Box::new(Expression::Tuple(tuple))
}
Rule::function => {
parse_function_expr(parsed).unwrap()
}
Rule::expr => {
let e = parsed.into_inner().next().unwrap();
parse_expr(e)
},
_ => {
println!("please implement parse expr for {:?}", parsed.as_rule());
unreachable!();
}
}
}
fn parse_identifier(parsed: Pair<Rule>) -> Identifier {
Identifier { name: parsed.as_str().into() }
}
fn parse_function_type(parsed: Pair<Rule>) -> Option<Type> {
let mut p = parsed.into_inner();
let parsed_params_type = p.next()?;
let parsed_return_type = p.next()?;
let params_type = parsed_params_type.into_inner().map(parse_type).collect();
let return_type = Box::new(parse_type(parsed_return_type));
Some(Type::Function(params_type, return_type))
}
fn parse_type(parsed: Pair<Rule>) -> Type {
match parsed.as_rule() {
Rule::type_bool => Type::Bool,
Rule::type_string => Type::String,
Rule::type_number => Type::Number,
Rule::tuple_type => {
let mut tuple = Vec::new();
let mut p = parsed.into_inner();
while let Some(t) = p.next() {
tuple.push(parse_type(t));
}
Type::Tuple(tuple)
}
Rule::type_name => {
Type::Identifier(parse_identifier(parsed))
}
Rule::function_type => {
parse_function_type(parsed).unwrap()
}
_ => {
println!("implement parse type for {:?}", parsed.as_rule());
unreachable!();
}
}
}
fn parse_declaration(parsed: Pair<Rule>) -> Declaration {
match parsed.as_rule() {
Rule::type_declaration => {
let mut p = parsed.into_inner();
let identifier = p.next().unwrap();
let typ = p.next().unwrap();
let id = parse_identifier(identifier);
let t = parse_type(typ);
Declaration::Type(id, t)
},
Rule::variable_declaration => {
let mut p = parsed.into_inner();
let identifier = p.next().unwrap();
let typ = p.next().unwrap();
let definition = p.next().unwrap();
let id = parse_identifier(identifier);
let t = parse_type(typ);
let d = parse_definition(definition);
Declaration::Variable(id, t, d)
}
_ => {
unreachable!();
}
}
}
fn parse_module_declaration(parsed: Pair<Rule>) -> Option<ModuleInformation> {
let mut p = parsed.into_inner();
let name = parse_identifier(p.next()?);
let export_import_list = p.next()?;
let mut export_list = Vec::new();
let mut import_list: HashMap<Identifier, Import> = HashMap::new();
export_import_list.into_inner().for_each(|item| {
match item.as_rule() {
Rule::module_export => {
item.into_inner().for_each(|export| {
export_list.push(parse_identifier(export))
});
}
Rule::module_import => {
let mut p = item.into_inner();
let name = parse_identifier(p.next().unwrap());
let import = {
match import_list.get_mut(&name) {
Some(import) => import,
None => {
import_list.insert(name.clone(), Import {
from: name.clone(),
rename: name.clone(),
imported: Vec::new(),
});
import_list.get_mut(&name).unwrap()
}
}
};
if let Some(type_import) = p.next() {
match type_import.as_rule() {
Rule::import_selective => {
let imported = type_import.into_inner().map(parse_identifier).collect();
import.imported = imported;
},
Rule::import_change_name => {
import.rename = parse_identifier(type_import.into_inner().next().unwrap());
},
_ => unreachable!(),
}
}
}
_ => {
unreachable!();
}
}
});
let export = {
if export_list.is_empty() {
Export::All
} else {
Export::Selective(export_list)
}
};
Some(ModuleInformation {
name,
export,
import: import_list.into_values().collect(),
})
}
pub fn parse_module(parsed: Pair<Rule>) -> Option<Module> {
let mut p = parsed.into_inner();
let module = p.next()?;
let module_info = parse_module_declaration(module)?;
let mut declaration_list = Vec::new();
while let Some(declaration) = p.next() {
let declare = parse_declaration(declaration);
declaration_list.push(declare);
}
Some(Module {
module: module_info,
declaration: declaration_list,
})
}

View File

@ -1,6 +1,6 @@
mod albireo;
use crate::albireo::{AlbireoParser, Rule};
use crate::albireo::{AlbireoParser, Rule, parse_module};
use std::fs;
use pest::Parser;
@ -8,10 +8,11 @@ use pest::Parser;
fn main() {
let input = fs::read_to_string("test/simple/main.air").expect("cannot read file");
let program = AlbireoParser::parse(Rule::program, &input)
let parsed = AlbireoParser::parse(Rule::program, &input)
.expect("file format is wrong or the parser is wrong")
.next()
.expect("cannot parse input file as a Albireo program");
println!("{:?}", program);
let module = parse_module(parsed);
println!("{:?}", module);
}

View File

@ -1,9 +1,44 @@
; declare a module
module main
c : fn => bool = function is
let a = b in
c(a)
; no explicit export so everything in this module is exported
export one, two
b : bool = false
; no import
import a
import a only name, name
import a as change_name
a : number = 1
; declare a variable
one : number = 1
; declare a variable that is computed
; places a burden on the runtime
two : number = one + 1
; tt : bool = true
; types can be named, alias
; using tuple or record or enum
type coordinates_tuple = tuple (number, number)
; type coordinates_record = record { x : number , y : number }
coordinates_as_tuple : coordinates_tuple = tuple (1, 2)
; coordinates_as_record : coordinates_record = record { x : 1 , y : 2 }
; function definition is structured the same
void : fn => number = function is
let x = 1 in
1
return_false : fn => bool = function is
false
increment : fn number => number = function x is
x + 1
is_even : fn number => bool = function x is
if x % 2 then true else false
bruh : number = increment(one)