Subato

Ein kleiner Compiler

In diesem Semester haben Sie viel über hierarchische Datenstrukturen aber auch über das Speichermodell von Programmiersprachen gelernt. In dieser abschließenden Projektaufgabe verbinden wir alle diese Erkenntnisse zu einem kleinen Compiler und Interpreter einer eigenen Programmiersprache.

Die Quellprache hat als einzigen Datentyp 64bit Zahlen. Sie kennt Funktionen, Schleifen, Bedingungen, lokale Variablen und arithmetische Ausdrücke.

Der generierte Assembler ist GNU Assembler und kann zusammen mit C Programmen compiliert werden.

Der Interpreter hat eine Read-Eval-Print-Loop.

Also erhalten Sie guten Einblick in die Funktionsweise einer Programmiersprache.

Sie können mit der main-Methode der Schnittstelle AST alle Funktionalität ausprobieren. Dafür liegt die kleine Quelltextdatei t1.ls vor. Zum Erzeugen von Assembler können Sie folgende Programmaufrufe tätigen:

java  name/panitz/longStack/AST  t1.ls
gcc testCode.c t1.s
./a.out

Für den Interpreter und den Pretty-Printer können Sie folgenden Aufruf machen:

java  name/panitz/longStack/AST  -i t1.ls

Studieren Sie das Aufgaben-Papier über den Compilerbau und lösen Sie die dort enthaltenen Aufgaben.


package name.panitz.longStack; import java.io.IOException; import java.io.StringWriter; import java.io.Writer; import java.io.FileWriter; import java.io.FileReader; import java.io.InputStreamReader; import java.io.BufferedReader; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.function.BinaryOperator; import java.util.function.Function; import java.util.function.Consumer; @SuppressWarnings("unchecked") public interface AST { public static enum BinOP{ add ((x,y)->x+y,"+"), sub ((x,y)->x-y,"-"), mult((x,y)->x*y,"*"), eq ((x,y)->x==y?1L:0L,"="); BinOP(BinaryOperator<Long> op,String name){ this.op = op; this.name = name; } BinaryOperator<Long> op; String name; } public static record IntLiteral(long n) implements AST{} public static record Var(String name) implements AST{} public static record Assign(Var v, AST right) implements AST{} public static record OpExpr(AST left, BinOP op,AST right) implements AST{} public static record IfExpr(AST cond, AST alt1, AST alt2) implements AST{} public static record WhileExpr(AST cond, AST body) implements AST{} public static record Sequence(List<AST> sts) implements AST{} public static record FunCall(String name, List<AST> args) implements AST{} static record Fun(String name, List<String> args, AST body){} static Fun factorial= new Fun("fac",List.of("x"), new Sequence(List.of (new Assign(new Var("r"), new IntLiteral(1)) ,new WhileExpr (new Var("x") ,new Sequence(List.of (new Assign (new Var("r") ,new OpExpr(new Var("r"), BinOP.mult,new Var("x"))) ,new Assign (new Var("x") ,new OpExpr(new Var("x"), BinOP.sub, new IntLiteral(1))) )) ) ,new Var("r") )) ); static Fun factorialRek = new Fun("f",List.of("x"), new IfExpr(new OpExpr(new Var("x"),BinOP.eq,new IntLiteral(0)) , new IntLiteral(1) , new OpExpr(new Var("x"),BinOP.mult ,new FunCall("f" , List.of(new OpExpr(new Var("x") , BinOP.sub, new IntLiteral(1))))))); static Fun minus = new Fun("minus" ,List.of("x","y") ,new OpExpr(new Var("x"), BinOP.sub, new Var("y"))); static Fun fib = new Fun("fib",List.of("x") ,new IfExpr(new OpExpr(new IntLiteral(0),BinOP.eq,new Var("x")) ,new IntLiteral(0) ,new IfExpr(new OpExpr(new IntLiteral(1),BinOP.eq,new Var("x")) ,new IntLiteral(1) ,new OpExpr (new FunCall("fib" ,List.of (new OpExpr(new Var("x"),BinOP.sub, new IntLiteral(2)))) ,BinOP.add ,new FunCall("fib" ,List.of (new OpExpr(new Var("x"),BinOP.sub, new IntLiteral(1)))) )))); default <R> R match(PatternCase<? extends AST,R>... pts){ for (var pt:pts) { if (pt.matches(this)) return pt.eval(this); } throw new RuntimeException("unmatched pattern case: "+this); } static interface PatternCase<C,R>{ boolean matches(Object o); R eval(Object o); } static <C,R> PatternCase<C,R> mkPattern(Class<C> cl,Function<C, R> f){ return new PatternCase<C,R>() { public boolean matches(Object o) {return cl.isInstance(o);} public R eval(Object o){return f.apply(cl.cast(o));} }; } static <R> PatternCase<OpExpr,R> $OpExpr(Function<OpExpr, R> f) { return mkPattern(OpExpr.class, f); } static<R>PatternCase<IntLiteral,R> $IntLiteral(Function<IntLiteral,R>f){ return mkPattern(IntLiteral.class, f); } static <R> PatternCase<Var,R> $Var(Function<Var, R> f){ return mkPattern(Var.class, f); } static <R> PatternCase<Assign,R> $Assign(Function<Assign, R> f){ return mkPattern(Assign.class, f); } static <R> PatternCase<IfExpr ,R> $IfExpr(Function<IfExpr, R> f){ return mkPattern(IfExpr.class, f); } static <R> PatternCase<WhileExpr,R> $WhileExpr(Function<WhileExpr, R> f){ return mkPattern(WhileExpr.class, f); } static <R> PatternCase<FunCall,R> $FunCall(Function<FunCall, R> f){ return mkPattern(FunCall.class, f); } static <R> PatternCase<Sequence,R> $Sequence(Function<Sequence, R> f) { return mkPattern(Sequence.class, f); } static <R> PatternCase<AST,R> $(Function<AST, R> f) { return mkPattern(AST.class, f); } default String whatAreYou() { return match ($Var (v -> "Variable mit Namen "+v.name+"") ,$IntLiteral(il-> "Literal mit Wert "+il.n) ,$FunCall (fc-> "Aufruf der Funktion: "+fc.name) ,$ (x -> "Irgend ein anderer Baumknoten") ); } static PatternCase<OpExpr,Void> _OpExpr(Consumer<OpExpr> f) { return mkPattern(OpExpr.class, x->{f.accept(x);return null;}); } static PatternCase<IntLiteral,Void> _IntLiteral(Consumer<IntLiteral> f){ return mkPattern(IntLiteral.class, x->{f.accept(x);return null;}); } static PatternCase<Var,Void> _Var(Consumer<Var> f){ return mkPattern(Var.class, x->{f.accept(x);return null;}); } static PatternCase<Assign,Void> _Assign(Consumer<Assign> f){ return mkPattern(Assign.class, x->{f.accept(x);return null;}); } static PatternCase<IfExpr ,Void> _IfExpr(Consumer<IfExpr> f){ return mkPattern(IfExpr.class, x->{f.accept(x);return null;}); } static PatternCase<WhileExpr,Void> _WhileExpr(Consumer<WhileExpr> f){ return mkPattern(WhileExpr.class, x->{f.accept(x);return null;}); } static PatternCase<FunCall,Void> _FunCall(Consumer<FunCall> f){ return mkPattern(FunCall.class, x->{f.accept(x);return null;}); } static PatternCase<Sequence,Void> _Sequence(Consumer<Sequence> f) { return mkPattern(Sequence.class, x->{f.accept(x);return null;}); } static PatternCase<AST,Void> __(Consumer<AST> f) { return mkPattern(AST.class, x->{f.accept(x);return null;}); } static class ExWriter{ Writer w; String indent = ""; void addIndent(){indent=indent+" ";} void subIndent(){indent=indent.substring(2);} void nl(){write("\n"+indent);} int lbl=0; int next(){return lbl++;} public ExWriter(Writer w) { this.w = w; } void lnwrite(Object o) { nl(); write(o); } void write(Object o) { try { w.write(o+""); } catch (IOException e) { throw new RuntimeException(e); } } } default String show() { var r = new ExWriter(new StringWriter()); show(r); return r.w.toString(); } static String show(Fun fd) { var r = new ExWriter(new StringWriter()); r.write("fun "); r.write(fd.name); r.write("("); var first=true; for (var arg:fd.args){ if (first) first=false;else r.write(", "); r.write(arg); } r.write(") = "); r.addIndent(); r.nl(); fd.body.show(r); return r.w.toString(); } default Void show(ExWriter r) { return match (_Var (v -> {r.write(v.name);}) ,_IntLiteral(il-> {r.write(il.n+"");}) ,__ (x -> {r.write("not yet implemented. Show: "+x);}) ); } default long ev() {return ev(List.of());} default long ev(List<Fun> funs) { HashMap<String, Fun> fs = new HashMap<>(); funs.forEach(fun->fs.put(fun.name, fun)); return ev(fs,new HashMap<>()); } default long ev(Map<String,Fun> fs, Map<String, Long> env) { return match ($IntLiteral(il -> il.n) ,$Var (v -> env.get(v.name)) ,$OpExpr (ae -> 42L) ,$IfExpr (ie -> 42L) ,$Assign (as -> {return 42L;}) ,$WhileExpr (we -> {return 0L;}) ,$Sequence (sq -> { var r = 42L; /*TO DO*/; return r; }) ,$FunCall (fc -> { return 42L; }) ); } String[] rs = {"%rdi","%rsi","%rdx","%rcx","%r8","%r9"}; default Set<String> getVars(){ var r = new TreeSet<String>(); getVars(r); return r; } default void getVars(TreeSet<String> r) { match (_Assign (as -> {r.add(as.v.name);as.right.getVars(r);}) ,__ (x -> {}) ); } static void asm(Fun f,ExWriter r) { r.nl(); r.lnwrite(".globl "+f.name); r.lnwrite(".type "+f.name+", @function"); r.lnwrite(f.name+":"); r.addIndent(); r.lnwrite("pushq %rbp"); r.lnwrite("movq %rsp, %rbp"); var registerArgs = Math.min(rs.length, f.args.size()); var env = new HashMap<String,Integer>(); var sp=-8; for (var i=0;i<registerArgs;i++) { r.lnwrite("movq "+rs[i]+", "+sp+"(%rbp)"); env.put(f.args.get(i), sp); sp = sp-8; } var vs = f.body.getVars(); vs.removeAll(f.args); for (var v:vs) { env.put(v, sp); sp = sp-8; } r.lnwrite("subq $"+(-sp)+", %rsp"); sp = 16; for (var i=registerArgs;i<f.args.size();i++) { env.put(f.args.get(i), sp); sp+=8; } f.body.asm(Map.of(),env,r); r.lnwrite("movq %rbp, %rsp"); r.lnwrite("popq %rbp"); r.lnwrite("ret"); r.subIndent(); } default String asm() { var r = new ExWriter(new StringWriter()); asm(new HashMap<>(),new HashMap<>(),r); return r.w.toString(); } static String asm(List<Fun> fs) { var r = new ExWriter(new StringWriter()); fs.forEach(f->asm(f,r)); return r.w.toString(); } default void asm(Map<String,Fun> fs,Map<String, Integer> e, ExWriter r){ match (_IntLiteral(il -> {r.lnwrite("movq $"+il.n+", %rax");}) ,_Var (v -> {r.lnwrite("movq "+e.get(v.name)+"(%rbp), %rax");}) ,_FunCall (fc -> {for (int i=0;i<Math.min(5, fc.args.size());i++) { fc.args.get(i).asm(fs, e, r); r.lnwrite("movq %rax, "+rs[i]); } ;fc.args.stream().skip(rs.length).forEach(arg -> { arg.asm(fs, e, r); r.lnwrite("pushq %rax"); }); ;r.lnwrite("call "+fc.name) ;}) ); } static String[] help = {"<expr> evaluate <expr>" ,"':q' quits the interpreter" ,"':defs' System.out.println();shows defined function names" ,"':s <funname>' prints function definition of <funname>" ,"':? this help"}; static void printHelp(){ for (var h:help)System.out.println(h); } public static void main(String[] args) throws Exception { if (args.length==0){ System.out.println ("usage: java name.panitz.longStack.AST [-i] filename"); System.out.println (" where -i starts interpreter " +"otherwise assembler code is generated"); return; } var interpreter = args[0].equals("-i"); var funDefs = LongStackParser.parseFunDefs(args[0].equals("-i")?args[1]:args[0]); if (interpreter){ var in = new BufferedReader(new InputStreamReader(System.in)); System.out.println("TUGS (Tiny Usable Great System) :? for help"); while (true){ System.out.print("> "); var ln = in.readLine(); if (ln.equals(":q")) break; if (ln.equals(":?")){printHelp();continue;} if (ln.equals(":defs")){ funDefs.parallelStream() .forEach(fd->System.out.println(fd.name)); continue; } var showFunction = ln.startsWith(":s "); try{ if (showFunction){ var funname = ln.substring(2).trim(); Fun fundef = funDefs.stream() .reduce (null ,(r,fd)->(r==null&&fd.name.equals(funname))?fd:r ,(fd1,fd2)->fd1==null?fd2:fd1); if (fundef!=null) System.out.println(show(fundef)); else System.out.println("unknown function: "+funname); }else{ System.out.println (LongStackParser.parseExpr(ln).ev(funDefs)); } }catch(Exception e){ System.out.println(e); } } }else{ var out = new FileWriter (args[0].substring(0,args[0].lastIndexOf('.'))+".s"); var o = new ExWriter(out); for (var fun:funDefs) asm(fun,o); o.nl(); out.close(); } } }
java
You are not logged in and therefore you cannot submit a solution.