You can use a very high-level language like ML, Python, Ruby, or Perl to quickly write compilers for little languages that directly translate abstract syntax trees into machine language lists.
Here is a compiler for a little language of plus expressions over numbers and identifiers, with assignments and while statements.
(*
* A little compiler from a trivial imperative language to a small
* single-accumulator machine.
*
* Source Language
* ---------------
* EXP -> NML | ID | EXP "+" ID
* BEX -> EXP "=" ID | "not" BEX
* COM -> "skip"
* | ID ":=" EXP
* | COM ";" COM
* | "while" BEX "do" COM "end"
*)
type num = int
type ide = string
datatype exp = (* Arithmetic Expressions *)
Lit of num
| Ident of ide
| Plus of exp * ide
datatype bex = (* Boolean Expressions *)
Eq of exp * ide
| Not of bex
datatype com = (* Commands *)
Skip
| Assn of ide * exp
| Seq of com list
| While of bex * com
(*
* Target Language
* ---------------
* The target language is an assembly language for a machine with a
* single accumulator and eight simple instructions.
*)
datatype inst =
LDI of num (* Load Immediate *)
| LD of ide (* Load *)
| ST of ide (* Store *)
| NOT (* Not *)
| EQ of ide (* Equal *)
| ADD of ide (* Add *)
| JMP of num (* Jump *)
| JZR of num (* Jump if zero *)
(*
* The Compiler
* ------------
* KE e loc the code from compiling expression e at address loc
* KB b loc the code from compiling boolexp b at address loc
* KC c loc the code from compiling command c at address loc
*)
fun KE (Lit n) loc = [LDI n]
| KE (Ident x) loc = [LD x]
| KE (Plus(e,x)) loc = KE e loc @ [ADD x]
fun KB (Eq(e,x)) loc = KE e loc @ [EQ x]
| KB (Not b) loc = KB b loc @ [NOT]
fun KC (Skip) loc = []
| KC (Assn(x,e)) loc = KE e loc @ [ST x]
| KC (Seq []) loc = []
| KC (Seq (c::cs)) loc =
let val f = KC c loc in
f @ KC (Seq cs) (loc + length f)
end
| KC (While(b,c)) loc =
let val f1 = KB b loc in
let val f2 = KC c (loc + length f1 + 1) in
f1 @ [JZR (loc + length f1 + 1 + length f2 + 1)] @ f2 @ [JMP loc]
end
end;
(*
* Pretty-Printing a target program
*)
fun mkstr (LDI n) = "LDI " ^ makestring n
| mkstr (LD x) = "LD " ^ x
| mkstr (ST x) = "ST " ^ x
| mkstr (NOT) = "NOT "
| mkstr (EQ x) = "EQ " ^ x
| mkstr (ADD x) = "ADD " ^ x
| mkstr (JMP n) = "JMP " ^ makestring n
| mkstr (JZR n) = "JZR " ^ makestring n;
fun printinst h loc =
TextIO.output(TextIO.stdOut, makestring loc ^ "\t" ^ mkstr h ^ "\n");
fun pprint [] loc = ()
| pprint (h::t) loc = (printinst h loc; pprint t (loc + 1));
(*
* An Example Program
* ------------------
* x := 5;
* skip;
* while not 8 = y do
* z := (19 + x) + y;
* end;
* x := z;
*)
val prog1 =
Seq [
Assn("x", Lit 5),
Skip,
While(
Not(Eq(Lit 8, "y")),
Assn("z",Plus(Plus(Lit 19, "x"), "y"))),
Assn("x", Ident "z")];
(*
* Top-level call to invoke the compiler
*)
fun compile c loc = pprint (KC c loc) loc
Now lets add multiplication and input/output.
(*
* A little compiler from a trivial imperative language to a small
* single-accumulator machine. The source language has addition
* and multiplication operators, assignments, while loops, and a
* couple I/O statements.
*
* Source Language
* ---------------
* EXP -> NML | ID | EXP "+" ID | EXP "*" ID | "read"
* BEX -> EXP "=" ID | "not" BEX
* COM -> "skip"
* | "write" EXP
* | ID ":=" EXP
* | COM ";" COM
* | "while" BEX "do" COM "end"
*)
type num = int
type ide = string
datatype exp =
Lit of num
| Ident of ide
| Plus of exp * ide
| Times of exp * ide
| Read
datatype bex =
Eq of exp * ide
| Not of bex
datatype com =
Skip
| Write of exp
| Assn of ide * exp
| Seq of com * com
| While of bex * com
(*
* Target Language
* ---------------
* The target language is an assembly language for a machine with a
* single accumulator and eleven simple instructions.
*)
datatype inst =
LDI of num (* Load Immediate *)
| LD of ide (* Load *)
| ST of ide (* Store *)
| NOT (* Not *)
| EQ of ide (* Equal *)
| ADD of ide (* Add *)
| MUL of ide (* Multiply *)
| JMP of num (* Jump *)
| JZR of num (* Jump if zero *)
| IN (* Input *)
| OUT (* Output *)
(*
* The Compiler
* ------------
* KE e loc the code from compiling expression e at address loc
* KB b loc the code from compiling boolexp b at address loc
* KC c loc the code from compiling command c at address loc
*)
fun KE (Lit n) loc = [LDI n]
| KE (Ident x) loc = [LD x]
| KE (Plus(e,x)) loc = KE e loc @ [ADD x]
| KE (Times(e,x)) loc = KE e loc @ [MUL x]
| KE (Read) loc = [IN]
fun KB (Eq(e,x)) loc = KE e loc @ [EQ x]
| KB (Not b) loc = KB b loc @ [NOT]
fun KC (Skip) loc = []
| KC (Write e) loc = KE e loc @ [OUT]
| KC (Assn(x,e)) loc = KE e loc @ [ST x]
| KC (Seq(c1,c2)) loc =
let val f = KC c1 loc in
f @ KC c2 (loc + length f)
end
| KC (While(b,c)) loc =
let val f1 = KB b loc in
let val f2 = KC c (loc + length f1 + 1) in
f1 @ [JZR (loc + length f1 + 1 + length f2 + 1)] @ f2 @ [JMP loc]
end
end;
(*
* Pretty-Printing a target program
*)
fun mkstr (LDI n) = "LDI " ^ makestring n
| mkstr (LD x) = "LD " ^ x
| mkstr (ST x) = "ST " ^ x
| mkstr (NOT) = "NOT "
| mkstr (EQ x) = "EQ " ^ x
| mkstr (ADD x) = "ADD " ^ x
| mkstr (MUL x) = "MUL " ^ x
| mkstr (JMP n) = "JMP " ^ makestring n
| mkstr (JZR n) = "JZR " ^ makestring n
| mkstr (IN) = "IN "
| mkstr (OUT) = "OUT "
fun printinst h loc =
TextIO.output(TextIO.stdOut, makestring loc ^ "\t" ^ mkstr h ^ "\n");
fun pprint [] loc = ()
| pprint (h::t) loc = (printinst h loc; pprint t (loc + 1))
(*
* An Example Program
* ------------------
* i := read;
* product := 1;
* while not 0 = i do
* product := product * i;
* i := i - 1
* end;
* write product
*)
val prog1 =
Seq (
Assn("i",Read),
Seq (
Assn("product",Lit 1),
Seq (
While(
Not(Eq(Lit 0,"i")),
Seq (
Assn("product",Times(Ident "product","i")),
Assn("i",Plus(Lit(~1),"i")))),
Write(Ident "product"))));
(*
* Top-level call to invoke the compiler
*)
fun compile c loc = pprint (KC c loc) loc
This is the same source language as before, but now the target architecture uses relative jumps.
(*
* A little compiler from a trivial imperative language to a small
* single-accumulator machine. The source language has addition
* and multiplication operators, assignments, while loops, and a
* couple I/O statements.
*
* Source Language
* ---------------
* EXP -> NML | ID | EXP "+" ID | EXP "*" ID | "read"
* BEX -> EXP "=" ID | "not" BEX
* COM -> "skip"
* | "write" EXP
* | ID ":=" EXP
* | COM ";" COM
* | "while" BEX "do" COM "end"
*)
type num = int
type ide = string
datatype exp =
Lit of num
| Ident of ide
| Plus of exp * ide
| Times of exp * ide
| Read
datatype bex =
Eq of exp * ide
| Not of bex
datatype com =
Skip
| Write of exp
| Assn of ide * exp
| Seq of com * com
| While of bex * com
(*
* Target Language
* ---------------
* The target language is an assembly language for a machine with a
* single accumulator and eleven simple instructions.
*)
datatype inst =
LDI of num (* Load Immediate *)
| LD of ide (* Load *)
| ST of ide (* Store *)
| NOT (* Not *)
| EQ of ide (* Equal *)
| ADD of ide (* Add *)
| MUL of ide (* Multiply *)
| JMP of num (* Jump *)
| JZR of num (* Jump if zero *)
| IN (* Input *)
| OUT (* Output *)
(*
* The Compiler
* ------------
* KE e loc the code from compiling expression e at address loc
* KB b loc the code from compiling boolexp b at address loc
* KC c loc the code from compiling command c at address loc
*)
fun KE (Lit n) = [LDI n]
| KE (Ident x) = [LD x]
| KE (Plus(e,x)) = (KE e) @ [ADD x]
| KE (Times(e,x)) = (KE e) @ [MUL x]
| KE (Read) = [IN]
fun KB (Eq(e,x)) = (KE e) @ [EQ x]
| KB (Not b) = (KB b) @ [NOT]
fun KC (Skip) = []
| KC (Assn(x,e)) = (KE e) @ [ST x]
| KC (Write e) = (KE e) @ [OUT]
| KC (Seq(c1,c2)) = (KC c1) @ (KC c2)
| KC (While(b,c)) =
let val test = (KB b) and body = (KC c) in
test @ [JZR (length body + 2)] @
body @ [JMP (~(length test + length body + 1))]
end
(*
* Pretty-Printing a target program
*)
fun mkstr (LDI n) = "LDI " ^ makestring n
| mkstr (LD x) = "LD " ^ x
| mkstr (ST x) = "ST " ^ x
| mkstr (NOT) = "NOT "
| mkstr (EQ x) = "EQ " ^ x
| mkstr (ADD x) = "ADD " ^ x
| mkstr (MUL x) = "MUL " ^ x
| mkstr (JMP n) = "JMP " ^ makestring n
| mkstr (JZR n) = "JZR " ^ makestring n
| mkstr (IN) = "IN "
| mkstr (OUT) = "OUT "
fun printinst h loc =
TextIO.output(TextIO.stdOut, makestring loc ^ "\t" ^ mkstr h ^ "\n");
fun pprint [] loc = ()
| pprint (h::t) loc = (printinst h loc; pprint t (loc + 1))
(*
* An Example Program
* ------------------
* i := read;
* product := 1;
* while not 0 = i do
* product := product * i;
* i := i - 1
* end;
* write product
*)
val prog1 =
Seq (
Assn("i",Read),
Seq (
Assn("product",Lit 1),
Seq (
While(
Not(Eq(Lit 0,"i")),
Seq (
Assn("product",Times(Ident "product","i")),
Assn("i",Plus(Lit(~1),"i")))),
Write(Ident "product"))));
(*
* Top-level call to invoke the compiler
*)
fun compile c loc = pprint (KC c) loc