#[derive(pest_derive::Parser)] #[grammar = "zkasm.pest"] pub struct ZkasmParser; use pest::iterators::Pair; use std::fmt; use crate::execution::Execution; #[derive(Debug, Clone)] pub enum UnaryOp { Negate, Not, } impl fmt::Display for UnaryOp { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { UnaryOp::Negate => write!(f, "-"), UnaryOp::Not => write!(f, "!"), } } } #[derive(Debug, Clone, PartialEq)] pub enum BinaryOp { Add, Sub, Mul, Div, Mod, Lt, Gt, Le, Ge, Eq, Neq, LogicalXor, LogicalAnd, LogicalOr, ArithAnd, ArithOr, ShiftLeft, ShiftRight, Power, } impl fmt::Display for BinaryOp { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let s = match self { BinaryOp::Add => "+", BinaryOp::Sub => "-", BinaryOp::Mul => "*", BinaryOp::Div => "/", BinaryOp::Lt => "<", BinaryOp::Gt => ">", BinaryOp::Le => "<=", BinaryOp::Ge => ">=", BinaryOp::Eq => "==", BinaryOp::Neq => "!=", BinaryOp::ArithOr => "|", BinaryOp::ArithAnd => "&", BinaryOp::LogicalOr => "||", BinaryOp::LogicalAnd => "&&", BinaryOp::ShiftLeft => "<<", BinaryOp::ShiftRight => ">>", BinaryOp::Power => "**", BinaryOp::Mod => "%", _ => { unreachable!() } }; write!(f, "{}", s) } } impl Into for String { fn into(self) -> BinaryOp { match self.as_str() { "+" => BinaryOp::Add, "-" => BinaryOp::Sub, "*" => BinaryOp::Mul, "/" => BinaryOp::Div, "%" => BinaryOp::Mod, "<" => BinaryOp::Lt, ">" => BinaryOp::Gt, "<=" => BinaryOp::Le, ">=" => BinaryOp::Ge, "==" => BinaryOp::Eq, "!=" => BinaryOp::Neq, "|" => BinaryOp::ArithOr, "&" => BinaryOp::ArithAnd, "||" => BinaryOp::LogicalOr, "&&" => BinaryOp::LogicalAnd, "<<" => BinaryOp::ShiftLeft, ">>" => BinaryOp::ShiftRight, "**" => BinaryOp::Power, _ => { unreachable!() } } } } #[derive(Debug, Clone)] pub enum Expr { Unary(UnaryOp, Box), Binary(Box, BinaryOp, Box), Tenary(Box, Box, Box), FreeInput(String), Register(Register), Value(u64), ValueComplex(String), // for hex number NameVariable(String), Increment(Box), Decrement(Box), } impl fmt::Display for Expr { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Expr::Unary(op, expr) => { write!(f, "{}({})", op, expr) } Expr::Binary(lhs, op, rhs) => { write!(f, "({} {} {})", lhs, op, rhs) } Expr::Tenary(expr, ifbranch, elsebranch) => { write!(f, "({} ? {} : {})", expr, ifbranch, elsebranch) } Expr::Register(r) => { write!(f, "{}", r.name()) } Expr::Value(v) => { write!(f, "{}", v) } Expr::ValueComplex(v) => { if v.starts_with("0x") { match u64::from_str_radix(v.strip_prefix("0x").unwrap(), 16).ok() { Some(s) => write!(f, "{}/* {} */", s, v), None => write!(f, "{}", v), } } else { write!(f, "{}", v) } } Expr::NameVariable(v) => { write!(f, "{}", v) } Expr::FreeInput(free) => { write!(f, "{}", free) } Expr::Increment(e) => { write!(f, "({})++", e) } Expr::Decrement(e) => { write!(f, "({})--", e) } } } } #[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)] pub enum Register { A, B, C, D, E, SP, RR, SR, PC, CTX, RCX, GAS, HASHPOS, HASH_LEFT, HASH_RIGHT, OLD_ROOT, NEW_ROOT, VALUE_LOW, VALUE_HIGH, SIBLING_VALUE_HASH, RKEY, SIBLING_RKEY, RKEY_BIT, LEVEL, } impl fmt::Display for Register { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { _ if self.is_special() => write!(f, "{}", self.name()), Register::A => write!(f, "A"), Register::B => write!(f, "B"), Register::C => write!(f, "C"), Register::D => write!(f, "D"), Register::E => write!(f, "E"), Register::SP => write!(f, "SP"), Register::SR => write!(f, "SR"), Register::RR => write!(f, "RR"), Register::PC => write!(f, "PC"), Register::GAS => write!(f, "GAS"), Register::CTX => write!(f, "CTX"), Register::RCX => write!(f, "RCX"), Register::HASHPOS => write!(f, "HASHPOS"), Register::HASH_LEFT => write!(f, "HASH_LEFT"), Register::HASH_RIGHT => write!(f, "HASH_RIGHT"), Register::OLD_ROOT => write!(f, "OLD_ROOT"), Register::NEW_ROOT => write!(f, "NEW_ROOT"), Register::VALUE_LOW => write!(f, "VALUE_LOW"), Register::VALUE_HIGH => write!(f, "VALUE_HIGH"), Register::SIBLING_VALUE_HASH => write!(f, "SIBLING_VALUE_HASH"), Register::RKEY => write!(f, "RKEY"), Register::SIBLING_RKEY => write!(f, "SIBLING_RKEY"), Register::RKEY_BIT => write!(f, "RKEY_BIT"), Register::LEVEL => write!(f, "LEVEL"), } } } impl Register { pub fn is_special(self) -> bool { match self { Register::A | Register::B | Register::C | Register::D | Register::E => false, _ => true, } } pub fn name(self) -> String { match self { Register::SR => "StateRoot".to_string(), Register::RR => "ReturnRegister".to_string(), Register::GAS => "GAS".to_string(), Register::HASHPOS => "HASHPOS".to_string(), Register::CTX => "CONTEXT".to_string(), Register::PC => "EVM(ProgramCounter)".to_string(), Register::SP => "EVM(StackPointer)".to_string(), _ => format!("{:?}", self) } } pub fn from_name(name: &str) -> Self { match name { "A" => Register::A, "B" => Register::B, "C" => Register::C, "D" => Register::D, "E" => Register::E, "SP" => Register::SP, "RR" => Register::RR, "SR" => Register::SR, "PC" => Register::PC, "CTX" => Register::CTX, "RCX" => Register::RCX, "GAS" => Register::GAS, "HASHPOS" => Register::HASHPOS, "HASH_LEFT" => Register::HASH_LEFT, "HASH_RIGHT" => Register::HASH_RIGHT, "OLD_ROOT" => Register::OLD_ROOT, "NEW_ROOT" => Register::NEW_ROOT, "VALUE_LOW" => Register::VALUE_LOW, "VALUE_HIGH" => Register::VALUE_HIGH, "SIBLING_VALUE_HASH" => Register::SIBLING_VALUE_HASH, "RKEY" => Register::RKEY, "SIBLING_RKEY" => Register::SIBLING_RKEY, "RKEY_BIT" => Register::RKEY_BIT, "LEVEL" => Register::LEVEL, _ => { unreachable!() } } } } #[derive(Debug, Clone)] pub enum AccessPlace { Memory, Stack, System, } impl fmt::Display for AccessPlace { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { AccessPlace::Memory => write!(f, "MEM"), AccessPlace::Stack => write!(f, "STACK"), AccessPlace::System => write!(f, "SYS"), } } } #[derive(Debug, Clone)] pub enum InstructionOpcodeParam { NameVariable(String), Accessor(AccessPlace, Expr), Calculated(Expr), } impl fmt::Display for InstructionOpcodeParam { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { InstructionOpcodeParam::NameVariable(name) => { write!(f, "{}", name) } InstructionOpcodeParam::Accessor(access, expr) => { write!(f, "{}:{}", access, expr) } InstructionOpcodeParam::Calculated(expr) => { write!(f, "{}", expr) } } } } #[derive(Debug)] pub struct InstructionOpcode { pub name: String, pub params: Vec, } impl InstructionOpcode { pub fn get_single(&self) -> InstructionOpcodeParam { self.params[0].clone() } pub fn get_at(&self, idx: usize) -> Option { self.params.get(idx).map(|x| x.clone()) } } #[derive(Debug)] pub enum Instruction { Expr(Expr), Assignment(Expr, Vec), Opcode(InstructionOpcode), Compound(Box, Vec), } #[derive(Debug)] pub struct Subroutine { pub name: String, pub instructions: Vec, } #[derive(Debug)] pub enum Definition { VariableGlobal(String, Option), VariableCTX(String, Option), Subroutine(Subroutine), Include(String), } // temporarily use display as a decompile printer impl fmt::Display for Definition { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { Definition::VariableCTX(var, Some(array)) => { write!(f, "VAR CTX {}[{}]", var, array) } Definition::VariableGlobal(var, Some(array)) => { write!(f, "VAR GLOBAL {}[{}]", var, array) } Definition::VariableCTX(var, None) => { write!(f, "VAR CTX {}", var) } Definition::VariableGlobal(var, None) => { write!(f, "VAR GLOBAL {}", var) } Definition::Include(include) => { write!(f, "INCLUDE {}", include) } Definition::Subroutine(subroutine) => { // subroutine will be decompiled let mut run = Execution::new(); run.start(subroutine); Ok(()) } } } } #[derive(Debug)] pub struct Program { pub constants: Vec<(String, String)>, pub definitions: Vec, } impl Program { pub fn decompile(self) { for c in self.constants { println!("CONST {} = {}", c.0, c.1); } for def in self.definitions { println!("{}", def); } } } fn parse_long_expr(expr: Pair) -> Expr { use pest::pratt_parser::PrattParser; use pest::pratt_parser::{Assoc, Op}; assert!(expr.as_rule() == Rule::expr); let pratt = PrattParser::new() .op(Op::infix(Rule::r#mod, Assoc::Left)) .op(Op::infix(Rule::add, Assoc::Left) | Op::infix(Rule::sub, Assoc::Left)) .op(Op::infix(Rule::mul, Assoc::Left) | Op::infix(Rule::div, Assoc::Left)) .op(Op::infix(Rule::and, Assoc::Left) | Op::infix(Rule::or, Assoc::Left) | Op::infix(Rule::xor, Assoc::Left)) .op(Op::infix(Rule::shl, Assoc::Left) | Op::infix(Rule::shr, Assoc::Left)) .op(Op::infix(Rule::power, Assoc::Left)) .op(Op::prefix(Rule::not) | Op::prefix(Rule::negate)) .op(Op::postfix(Rule::increment) | Op::postfix(Rule::decrement)) ; // println!("{:?}", expr); pratt .map_primary(|primary| match primary.as_rule() { Rule::expr => { // ( expr ) parse_expr(primary) } _ => { parse_expr_atomic(primary) } }) .map_prefix(|op, rhs| { match op.as_str() { "-" => { Expr::Unary(UnaryOp::Negate, Box::new(rhs)) } "!" => { Expr::Unary(UnaryOp::Not, Box::new(rhs)) } _ => unreachable!() }}) .map_infix(|lhs, op, rhs| { let binop: BinaryOp = op.as_str().to_string().into(); Expr::Binary(Box::new(lhs.clone()), binop, Box::new(rhs.clone())) }) .map_postfix(|expr, op| { match op.as_rule() { Rule::decrement => Expr::Decrement(Box::new(expr.clone())), Rule::increment => Expr::Increment(Box::new(expr.clone())), _ => unreachable!() } }) .parse(expr.into_inner()) } fn parse_expr_atomic(expr: Pair) -> Expr { match expr.as_rule() { Rule::register => Expr::Register(Register::from_name(expr.as_str())), Rule::number => { match expr.as_str().to_string().parse::().ok() { Some(v) => Expr::Value(v), None => Expr::ValueComplex(expr.as_str().into()), } } Rule::freeinput => Expr::FreeInput(expr.as_str().into()), Rule::special_variable => { let v = expr.as_str(); match v { "A" | "B" | "C" | "D" | "E" | "SR" | "RR" | "SP" | "CTX" | "HASHPOS" => { Expr::Register(Register::from_name(v)) } _ => Expr::NameVariable(v.into()), } } Rule::constid => { let v = expr.as_str(); match v { "A" | "B" | "C" | "D" | "E" | "SR" | "RR" | "SP" | "CTX" | "HASHPOS" => { Expr::Register(Register::from_name(v)) } _ => Expr::NameVariable(v.into()), } } Rule::reference => { let v = expr.as_str(); Expr::NameVariable(v.into()) } _ => { println!("parse atomic {:?}", expr); unreachable!(); } } } pub fn parse_expr(inst: Pair) -> Expr { match inst.as_rule() { Rule::expr => { parse_long_expr(inst) // let mut peak = inst.clone().into_inner(); // if peak.len() > 1 { // return parse_long_expr(inst); // } // let expr = peak.next().unwrap(); // parse_expr_atomic(expr) } Rule::negate_expr => { let mut p = inst.into_inner(); let op = match p.next().unwrap().as_str() { "-" => UnaryOp::Negate, "!" => UnaryOp::Not, _ => unreachable!() }; let expr = parse_expr(p.next().unwrap()); Expr::Unary(op, Box::new(expr)) } Rule::tenary_expr => { let mut p = inst.into_inner(); let condition = parse_expr(p.next().unwrap()); let ifbranch = parse_expr(p.next().unwrap()); let elsebranch = parse_expr(p.next().unwrap()); Expr::Tenary(Box::new(condition), Box::new(ifbranch), Box::new(elsebranch)) } Rule::register | Rule::number | Rule::freeinput | Rule::special_variable | Rule::constid | Rule::reference => { parse_expr_atomic(inst) } _ => { unreachable!(); } } } pub fn parse_param(param: Pair) -> InstructionOpcodeParam { match param.as_rule() { Rule::identifier => InstructionOpcodeParam::NameVariable(param.as_str().into()), Rule::memory_access => { let mut p = param.into_inner(); let scope = match p.next().unwrap().as_str() { "MEM" => AccessPlace::Memory, "STACK" => AccessPlace::Stack, "SYS" => AccessPlace::System, _ => unreachable!(), }; let expr = parse_expr(p.next().unwrap()); InstructionOpcodeParam::Accessor(scope, expr) } Rule::expr => { let e = parse_expr(param); if let Expr::NameVariable(name) = e { InstructionOpcodeParam::NameVariable(name) } else { InstructionOpcodeParam::Calculated(e) } } _ => { unreachable!(); } } } // parse a single instruction, could be type 1, 2 or 3 pub fn parse_instruction(instruction: Pair) -> Instruction { // println!("parsing inst {:?}", instruction); match instruction.as_rule() { Rule::instruction_type1 => { let inst = instruction.into_inner().next().unwrap(); if inst.as_rule() == Rule::expr { Instruction::Expr(parse_expr(inst)) } else if inst.as_rule() == Rule::negate_expr { Instruction::Expr(parse_expr(inst)) } else if inst.as_rule() == Rule::assignment { let mut p = inst.into_inner(); let expr = parse_expr(p.next().unwrap()); let registers: Vec = p.map(|r| Register::from_name(r.as_str())).collect(); Instruction::Assignment(expr, registers) } else { println!("parsing inst {:?}", inst); unreachable!() } // println!("typ1 {:?}", inst) } // instruction_type2 is a list of instruction_right and will be resolved using parse_instruction_list Rule::instruction_right => { let mut inst = instruction.into_inner(); let name = inst.next().unwrap().as_str(); let mut params: Vec = Vec::new(); while let Some(param) = inst.next() { let mut p: Vec = param.into_inner().map(parse_param).collect(); params.append(&mut p); } Instruction::Opcode(InstructionOpcode { name: name.into(), params, }) } Rule::instruction_type3 => { let mut insts = instruction.into_inner(); let lhs = parse_instruction(insts.next().unwrap()); let rhs = parse_instruction_list(insts.next().unwrap()); Instruction::Compound(Box::new(lhs), rhs) } _ => { unreachable!() } } } // parse a list of instruction pub fn parse_instruction_list(instruction_list: Pair) -> Vec { let mut parsed_instructions = Vec::new(); for instruction in instruction_list.into_inner() { if instruction.as_rule() == Rule::instruction_type2 { // could be many or 1 for inst in instruction.into_inner() { parsed_instructions.push(parse_instruction(inst)); } } else { parsed_instructions.push(parse_instruction(instruction)); } } return parsed_instructions; }