zkasm/src/zkasm.rs
2024-06-18 22:05:01 +07:00

624 lines
19 KiB
Rust

#[derive(pest_derive::Parser)]
#[grammar = "zkasm.pest"]
pub struct ZkasmParser;
use pest::iterators::Pair;
use std::fmt;
use crate::execution::Execution;
#[derive(Debug, Clone)]
pub enum UnaryOp {
Negate,
Not,
}
impl fmt::Display for UnaryOp {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
UnaryOp::Negate => write!(f, "-"),
UnaryOp::Not => write!(f, "!"),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum BinaryOp {
Add,
Sub,
Mul,
Div,
Mod,
Lt,
Gt,
Le,
Ge,
Eq,
Neq,
LogicalXor,
LogicalAnd,
LogicalOr,
ArithAnd,
ArithOr,
ShiftLeft,
ShiftRight,
Power,
}
impl fmt::Display for BinaryOp {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let s = match self {
BinaryOp::Add => "+",
BinaryOp::Sub => "-",
BinaryOp::Mul => "*",
BinaryOp::Div => "/",
BinaryOp::Lt => "<",
BinaryOp::Gt => ">",
BinaryOp::Le => "<=",
BinaryOp::Ge => ">=",
BinaryOp::Eq => "==",
BinaryOp::Neq => "!=",
BinaryOp::ArithOr => "|",
BinaryOp::ArithAnd => "&",
BinaryOp::LogicalOr => "||",
BinaryOp::LogicalAnd => "&&",
BinaryOp::ShiftLeft => "<<",
BinaryOp::ShiftRight => ">>",
BinaryOp::Power => "**",
BinaryOp::Mod => "%",
_ => {
unreachable!()
}
};
write!(f, "{}", s)
}
}
impl Into<BinaryOp> for String {
fn into(self) -> BinaryOp {
match self.as_str() {
"+" => BinaryOp::Add,
"-" => BinaryOp::Sub,
"*" => BinaryOp::Mul,
"/" => BinaryOp::Div,
"%" => BinaryOp::Mod,
"<" => BinaryOp::Lt,
">" => BinaryOp::Gt,
"<=" => BinaryOp::Le,
">=" => BinaryOp::Ge,
"==" => BinaryOp::Eq,
"!=" => BinaryOp::Neq,
"|" => BinaryOp::ArithOr,
"&" => BinaryOp::ArithAnd,
"||" => BinaryOp::LogicalOr,
"&&" => BinaryOp::LogicalAnd,
"<<" => BinaryOp::ShiftLeft,
">>" => BinaryOp::ShiftRight,
"**" => BinaryOp::Power,
_ => {
unreachable!()
}
}
}
}
#[derive(Debug, Clone)]
pub enum Expr {
Unary(UnaryOp, Box<Expr>),
Binary(Box<Expr>, BinaryOp, Box<Expr>),
Tenary(Box<Expr>, Box<Expr>, Box<Expr>),
FreeInput(String),
Register(Register),
Value(u64),
ValueComplex(String), // for hex number
NameVariable(String),
Increment(Box<Expr>),
Decrement(Box<Expr>),
}
impl fmt::Display for Expr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Expr::Unary(op, expr) => {
write!(f, "{}({})", op, expr)
}
Expr::Binary(lhs, op, rhs) => {
write!(f, "({} {} {})", lhs, op, rhs)
}
Expr::Tenary(expr, ifbranch, elsebranch) => {
write!(f, "({} ? {} : {})", expr, ifbranch, elsebranch)
}
Expr::Register(r) => {
write!(f, "{}", r.name())
}
Expr::Value(v) => {
write!(f, "{}", v)
}
Expr::ValueComplex(v) => {
if v.starts_with("0x") {
match u64::from_str_radix(v.strip_prefix("0x").unwrap(), 16).ok() {
Some(s) => write!(f, "{}/* {} */", s, v),
None => write!(f, "{}", v),
}
} else {
write!(f, "{}", v)
}
}
Expr::NameVariable(v) => {
write!(f, "{}", v)
}
Expr::FreeInput(free) => {
write!(f, "{}", free)
}
Expr::Increment(e) => {
write!(f, "({})++", e)
}
Expr::Decrement(e) => {
write!(f, "({})--", e)
}
}
}
}
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Register {
A,
B,
C,
D,
E,
SP,
RR,
SR,
PC,
CTX,
RCX,
GAS,
HASHPOS,
HASH_LEFT,
HASH_RIGHT,
OLD_ROOT,
NEW_ROOT,
VALUE_LOW,
VALUE_HIGH,
SIBLING_VALUE_HASH,
RKEY,
SIBLING_RKEY,
RKEY_BIT,
LEVEL,
}
impl fmt::Display for Register {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
_ if self.is_special() => write!(f, "{}", self.name()),
Register::A => write!(f, "A"),
Register::B => write!(f, "B"),
Register::C => write!(f, "C"),
Register::D => write!(f, "D"),
Register::E => write!(f, "E"),
Register::SP => write!(f, "SP"),
Register::SR => write!(f, "SR"),
Register::RR => write!(f, "RR"),
Register::PC => write!(f, "PC"),
Register::GAS => write!(f, "GAS"),
Register::CTX => write!(f, "CTX"),
Register::RCX => write!(f, "RCX"),
Register::HASHPOS => write!(f, "HASHPOS"),
Register::HASH_LEFT => write!(f, "HASH_LEFT"),
Register::HASH_RIGHT => write!(f, "HASH_RIGHT"),
Register::OLD_ROOT => write!(f, "OLD_ROOT"),
Register::NEW_ROOT => write!(f, "NEW_ROOT"),
Register::VALUE_LOW => write!(f, "VALUE_LOW"),
Register::VALUE_HIGH => write!(f, "VALUE_HIGH"),
Register::SIBLING_VALUE_HASH => write!(f, "SIBLING_VALUE_HASH"),
Register::RKEY => write!(f, "RKEY"),
Register::SIBLING_RKEY => write!(f, "SIBLING_RKEY"),
Register::RKEY_BIT => write!(f, "RKEY_BIT"),
Register::LEVEL => write!(f, "LEVEL"),
}
}
}
impl Register {
pub fn is_special(self) -> bool {
match self {
Register::A | Register::B | Register::C | Register::D | Register::E => false,
_ => true,
}
}
pub fn name(self) -> String {
match self {
Register::SR => "StateRoot".to_string(),
Register::RR => "ReturnRegister".to_string(),
Register::GAS => "GAS".to_string(),
Register::HASHPOS => "HASHPOS".to_string(),
Register::CTX => "CONTEXT".to_string(),
Register::PC => "EVM(ProgramCounter)".to_string(),
Register::SP => "EVM(StackPointer)".to_string(),
_ => format!("{:?}", self)
}
}
pub fn from_name(name: &str) -> Self {
match name {
"A" => Register::A,
"B" => Register::B,
"C" => Register::C,
"D" => Register::D,
"E" => Register::E,
"SP" => Register::SP,
"RR" => Register::RR,
"SR" => Register::SR,
"PC" => Register::PC,
"CTX" => Register::CTX,
"RCX" => Register::RCX,
"GAS" => Register::GAS,
"HASHPOS" => Register::HASHPOS,
"HASH_LEFT" => Register::HASH_LEFT,
"HASH_RIGHT" => Register::HASH_RIGHT,
"OLD_ROOT" => Register::OLD_ROOT,
"NEW_ROOT" => Register::NEW_ROOT,
"VALUE_LOW" => Register::VALUE_LOW,
"VALUE_HIGH" => Register::VALUE_HIGH,
"SIBLING_VALUE_HASH" => Register::SIBLING_VALUE_HASH,
"RKEY" => Register::RKEY,
"SIBLING_RKEY" => Register::SIBLING_RKEY,
"RKEY_BIT" => Register::RKEY_BIT,
"LEVEL" => Register::LEVEL,
_ => {
unreachable!()
}
}
}
}
#[derive(Debug, Clone)]
pub enum AccessPlace {
Memory,
Stack,
System,
}
impl fmt::Display for AccessPlace {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
AccessPlace::Memory => write!(f, "MEM"),
AccessPlace::Stack => write!(f, "STACK"),
AccessPlace::System => write!(f, "SYS"),
}
}
}
#[derive(Debug, Clone)]
pub enum InstructionOpcodeParam {
NameVariable(String),
Accessor(AccessPlace, Expr),
Calculated(Expr),
}
impl fmt::Display for InstructionOpcodeParam {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
InstructionOpcodeParam::NameVariable(name) => {
write!(f, "{}", name)
}
InstructionOpcodeParam::Accessor(access, expr) => {
write!(f, "{}:{}", access, expr)
}
InstructionOpcodeParam::Calculated(expr) => {
write!(f, "{}", expr)
}
}
}
}
#[derive(Debug)]
pub struct InstructionOpcode {
pub name: String,
pub params: Vec<InstructionOpcodeParam>,
}
impl InstructionOpcode {
pub fn get_single(&self) -> InstructionOpcodeParam {
self.params[0].clone()
}
pub fn get_at(&self, idx: usize) -> Option<InstructionOpcodeParam> {
self.params.get(idx).map(|x| x.clone())
}
}
#[derive(Debug)]
pub enum Instruction {
Expr(Expr),
Assignment(Expr, Vec<Register>),
Opcode(InstructionOpcode),
Compound(Box<Instruction>, Vec<Instruction>),
}
#[derive(Debug)]
pub struct Subroutine {
pub name: String,
pub instructions: Vec<Instruction>,
}
#[derive(Debug)]
pub enum Definition {
VariableGlobal(String, Option<Expr>),
VariableCTX(String, Option<Expr>),
Subroutine(Subroutine),
Include(String),
}
// temporarily use display as a decompile printer
impl fmt::Display for Definition {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Definition::VariableCTX(var, Some(array)) => {
write!(f, "VAR CTX {}[{}]", var, array)
}
Definition::VariableGlobal(var, Some(array)) => {
write!(f, "VAR GLOBAL {}[{}]", var, array)
}
Definition::VariableCTX(var, None) => {
write!(f, "VAR CTX {}", var)
}
Definition::VariableGlobal(var, None) => {
write!(f, "VAR GLOBAL {}", var)
}
Definition::Include(include) => {
write!(f, "INCLUDE {}", include)
}
Definition::Subroutine(subroutine) => {
// subroutine will be decompiled
let mut run = Execution::new();
run.start(subroutine);
Ok(())
}
}
}
}
#[derive(Debug)]
pub struct Program {
pub constants: Vec<(String, String)>,
pub definitions: Vec<Definition>,
}
impl Program {
pub fn decompile(self) {
for c in self.constants {
println!("CONST {} = {}", c.0, c.1);
}
for def in self.definitions {
println!("{}", def);
}
}
}
fn parse_long_expr(expr: Pair<Rule>) -> Expr {
use pest::pratt_parser::PrattParser;
use pest::pratt_parser::{Assoc, Op};
assert!(expr.as_rule() == Rule::expr);
let pratt =
PrattParser::new()
.op(Op::infix(Rule::r#mod, Assoc::Left))
.op(Op::infix(Rule::add, Assoc::Left) | Op::infix(Rule::sub, Assoc::Left))
.op(Op::infix(Rule::mul, Assoc::Left) | Op::infix(Rule::div, Assoc::Left))
.op(Op::infix(Rule::and, Assoc::Left) | Op::infix(Rule::or, Assoc::Left) | Op::infix(Rule::xor, Assoc::Left))
.op(Op::infix(Rule::shl, Assoc::Left) | Op::infix(Rule::shr, Assoc::Left))
.op(Op::infix(Rule::power, Assoc::Left))
.op(Op::prefix(Rule::not) | Op::prefix(Rule::negate))
.op(Op::postfix(Rule::increment) | Op::postfix(Rule::decrement))
;
// println!("{:?}", expr);
pratt
.map_primary(|primary| match primary.as_rule() {
Rule::expr => {
// ( expr )
parse_expr(primary)
}
_ => {
parse_expr_atomic(primary)
}
})
.map_prefix(|op, rhs| {
match op.as_str() {
"-" => {
Expr::Unary(UnaryOp::Negate, Box::new(rhs))
}
"!" => {
Expr::Unary(UnaryOp::Not, Box::new(rhs))
}
_ => unreachable!()
}})
.map_infix(|lhs, op, rhs| {
let binop: BinaryOp = op.as_str().to_string().into();
Expr::Binary(Box::new(lhs.clone()), binop, Box::new(rhs.clone()))
})
.map_postfix(|expr, op| {
match op.as_rule() {
Rule::decrement => Expr::Decrement(Box::new(expr.clone())),
Rule::increment => Expr::Increment(Box::new(expr.clone())),
_ => unreachable!()
}
})
.parse(expr.into_inner())
}
fn parse_expr_atomic(expr: Pair<Rule>) -> Expr {
match expr.as_rule() {
Rule::register => Expr::Register(Register::from_name(expr.as_str())),
Rule::number => {
match expr.as_str().to_string().parse::<u64>().ok() {
Some(v) => Expr::Value(v),
None => Expr::ValueComplex(expr.as_str().into()),
}
}
Rule::freeinput => Expr::FreeInput(expr.as_str().into()),
Rule::special_variable => {
let v = expr.as_str();
match v {
"A" | "B" | "C" | "D" | "E" | "SR" | "RR" | "SP" | "CTX" | "HASHPOS" => {
Expr::Register(Register::from_name(v))
}
_ => Expr::NameVariable(v.into()),
}
}
Rule::constid => {
let v = expr.as_str();
match v {
"A" | "B" | "C" | "D" | "E" | "SR" | "RR" | "SP" | "CTX" | "HASHPOS" => {
Expr::Register(Register::from_name(v))
}
_ => Expr::NameVariable(v.into()),
}
}
Rule::reference => {
let v = expr.as_str();
Expr::NameVariable(v.into())
}
_ => {
println!("parse atomic {:?}", expr);
unreachable!();
}
}
}
pub fn parse_expr(inst: Pair<Rule>) -> Expr {
match inst.as_rule() {
Rule::expr => {
parse_long_expr(inst)
// let mut peak = inst.clone().into_inner();
// if peak.len() > 1 {
// return parse_long_expr(inst);
// }
// let expr = peak.next().unwrap();
// parse_expr_atomic(expr)
}
Rule::negate_expr => {
let mut p = inst.into_inner();
let op = match p.next().unwrap().as_str() {
"-" => UnaryOp::Negate,
"!" => UnaryOp::Not,
_ => unreachable!()
};
let expr = parse_expr(p.next().unwrap());
Expr::Unary(op, Box::new(expr))
}
Rule::tenary_expr => {
let mut p = inst.into_inner();
let condition = parse_expr(p.next().unwrap());
let ifbranch = parse_expr(p.next().unwrap());
let elsebranch = parse_expr(p.next().unwrap());
Expr::Tenary(Box::new(condition), Box::new(ifbranch), Box::new(elsebranch))
}
Rule::register | Rule::number | Rule::freeinput | Rule::special_variable | Rule::constid | Rule::reference => {
parse_expr_atomic(inst)
}
_ => {
unreachable!();
}
}
}
pub fn parse_param(param: Pair<Rule>) -> InstructionOpcodeParam {
match param.as_rule() {
Rule::identifier => InstructionOpcodeParam::NameVariable(param.as_str().into()),
Rule::memory_access => {
let mut p = param.into_inner();
let scope = match p.next().unwrap().as_str() {
"MEM" => AccessPlace::Memory,
"STACK" => AccessPlace::Stack,
"SYS" => AccessPlace::System,
_ => unreachable!(),
};
let expr = parse_expr(p.next().unwrap());
InstructionOpcodeParam::Accessor(scope, expr)
}
Rule::expr => {
let e = parse_expr(param);
if let Expr::NameVariable(name) = e {
InstructionOpcodeParam::NameVariable(name)
} else {
InstructionOpcodeParam::Calculated(e)
}
}
_ => {
unreachable!();
}
}
}
// parse a single instruction, could be type 1, 2 or 3
pub fn parse_instruction(instruction: Pair<Rule>) -> Instruction {
// println!("parsing inst {:?}", instruction);
match instruction.as_rule() {
Rule::instruction_type1 => {
let inst = instruction.into_inner().next().unwrap();
if inst.as_rule() == Rule::expr {
Instruction::Expr(parse_expr(inst))
} else if inst.as_rule() == Rule::negate_expr {
Instruction::Expr(parse_expr(inst))
} else if inst.as_rule() == Rule::assignment {
let mut p = inst.into_inner();
let expr = parse_expr(p.next().unwrap());
let registers: Vec<Register> = p.map(|r| Register::from_name(r.as_str())).collect();
Instruction::Assignment(expr, registers)
} else {
println!("parsing inst {:?}", inst);
unreachable!()
}
// println!("typ1 {:?}", inst)
}
// instruction_type2 is a list of instruction_right and will be resolved using parse_instruction_list
Rule::instruction_right => {
let mut inst = instruction.into_inner();
let name = inst.next().unwrap().as_str();
let mut params: Vec<InstructionOpcodeParam> = Vec::new();
while let Some(param) = inst.next() {
let mut p: Vec<InstructionOpcodeParam> = param.into_inner().map(parse_param).collect();
params.append(&mut p);
}
Instruction::Opcode(InstructionOpcode {
name: name.into(),
params,
})
}
Rule::instruction_type3 => {
let mut insts = instruction.into_inner();
let lhs = parse_instruction(insts.next().unwrap());
let rhs = parse_instruction_list(insts.next().unwrap());
Instruction::Compound(Box::new(lhs), rhs)
}
_ => {
unreachable!()
}
}
}
// parse a list of instruction
pub fn parse_instruction_list(instruction_list: Pair<Rule>) -> Vec<Instruction> {
let mut parsed_instructions = Vec::new();
for instruction in instruction_list.into_inner() {
if instruction.as_rule() == Rule::instruction_type2 {
// could be many or 1
for inst in instruction.into_inner() {
parsed_instructions.push(parse_instruction(inst));
}
} else {
parsed_instructions.push(parse_instruction(instruction));
}
}
return parsed_instructions;
}