OCamlLangImpl4.rst 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915
  1. ==============================================
  2. Kaleidoscope: Adding JIT and Optimizer Support
  3. ==============================================
  4. .. contents::
  5. :local:
  6. Chapter 4 Introduction
  7. ======================
  8. Welcome to Chapter 4 of the "`Implementing a language with
  9. LLVM <index.html>`_" tutorial. Chapters 1-3 described the implementation
  10. of a simple language and added support for generating LLVM IR. This
  11. chapter describes two new techniques: adding optimizer support to your
  12. language, and adding JIT compiler support. These additions will
  13. demonstrate how to get nice, efficient code for the Kaleidoscope
  14. language.
  15. Trivial Constant Folding
  16. ========================
  17. **Note:** the default ``IRBuilder`` now always includes the constant
  18. folding optimisations below.
  19. Our demonstration for Chapter 3 is elegant and easy to extend.
  20. Unfortunately, it does not produce wonderful code. For example, when
  21. compiling simple code, we don't get obvious optimizations:
  22. ::
  23. ready> def test(x) 1+2+x;
  24. Read function definition:
  25. define double @test(double %x) {
  26. entry:
  27. %addtmp = fadd double 1.000000e+00, 2.000000e+00
  28. %addtmp1 = fadd double %addtmp, %x
  29. ret double %addtmp1
  30. }
  31. This code is a very, very literal transcription of the AST built by
  32. parsing the input. As such, this transcription lacks optimizations like
  33. constant folding (we'd like to get "``add x, 3.0``" in the example
  34. above) as well as other more important optimizations. Constant folding,
  35. in particular, is a very common and very important optimization: so much
  36. so that many language implementors implement constant folding support in
  37. their AST representation.
  38. With LLVM, you don't need this support in the AST. Since all calls to
  39. build LLVM IR go through the LLVM builder, it would be nice if the
  40. builder itself checked to see if there was a constant folding
  41. opportunity when you call it. If so, it could just do the constant fold
  42. and return the constant instead of creating an instruction. This is
  43. exactly what the ``LLVMFoldingBuilder`` class does.
  44. All we did was switch from ``LLVMBuilder`` to ``LLVMFoldingBuilder``.
  45. Though we change no other code, we now have all of our instructions
  46. implicitly constant folded without us having to do anything about it.
  47. For example, the input above now compiles to:
  48. ::
  49. ready> def test(x) 1+2+x;
  50. Read function definition:
  51. define double @test(double %x) {
  52. entry:
  53. %addtmp = fadd double 3.000000e+00, %x
  54. ret double %addtmp
  55. }
  56. Well, that was easy :). In practice, we recommend always using
  57. ``LLVMFoldingBuilder`` when generating code like this. It has no
  58. "syntactic overhead" for its use (you don't have to uglify your compiler
  59. with constant checks everywhere) and it can dramatically reduce the
  60. amount of LLVM IR that is generated in some cases (particular for
  61. languages with a macro preprocessor or that use a lot of constants).
  62. On the other hand, the ``LLVMFoldingBuilder`` is limited by the fact
  63. that it does all of its analysis inline with the code as it is built. If
  64. you take a slightly more complex example:
  65. ::
  66. ready> def test(x) (1+2+x)*(x+(1+2));
  67. ready> Read function definition:
  68. define double @test(double %x) {
  69. entry:
  70. %addtmp = fadd double 3.000000e+00, %x
  71. %addtmp1 = fadd double %x, 3.000000e+00
  72. %multmp = fmul double %addtmp, %addtmp1
  73. ret double %multmp
  74. }
  75. In this case, the LHS and RHS of the multiplication are the same value.
  76. We'd really like to see this generate "``tmp = x+3; result = tmp*tmp;``"
  77. instead of computing "``x*3``" twice.
  78. Unfortunately, no amount of local analysis will be able to detect and
  79. correct this. This requires two transformations: reassociation of
  80. expressions (to make the add's lexically identical) and Common
  81. Subexpression Elimination (CSE) to delete the redundant add instruction.
  82. Fortunately, LLVM provides a broad range of optimizations that you can
  83. use, in the form of "passes".
  84. LLVM Optimization Passes
  85. ========================
  86. LLVM provides many optimization passes, which do many different sorts of
  87. things and have different tradeoffs. Unlike other systems, LLVM doesn't
  88. hold to the mistaken notion that one set of optimizations is right for
  89. all languages and for all situations. LLVM allows a compiler implementor
  90. to make complete decisions about what optimizations to use, in which
  91. order, and in what situation.
  92. As a concrete example, LLVM supports both "whole module" passes, which
  93. look across as large of body of code as they can (often a whole file,
  94. but if run at link time, this can be a substantial portion of the whole
  95. program). It also supports and includes "per-function" passes which just
  96. operate on a single function at a time, without looking at other
  97. functions. For more information on passes and how they are run, see the
  98. `How to Write a Pass <../WritingAnLLVMPass.html>`_ document and the
  99. `List of LLVM Passes <../Passes.html>`_.
  100. For Kaleidoscope, we are currently generating functions on the fly, one
  101. at a time, as the user types them in. We aren't shooting for the
  102. ultimate optimization experience in this setting, but we also want to
  103. catch the easy and quick stuff where possible. As such, we will choose
  104. to run a few per-function optimizations as the user types the function
  105. in. If we wanted to make a "static Kaleidoscope compiler", we would use
  106. exactly the code we have now, except that we would defer running the
  107. optimizer until the entire file has been parsed.
  108. In order to get per-function optimizations going, we need to set up a
  109. `Llvm.PassManager <../WritingAnLLVMPass.html#what-passmanager-does>`_ to hold and
  110. organize the LLVM optimizations that we want to run. Once we have that,
  111. we can add a set of optimizations to run. The code looks like this:
  112. .. code-block:: ocaml
  113. (* Create the JIT. *)
  114. let the_execution_engine = ExecutionEngine.create Codegen.the_module in
  115. let the_fpm = PassManager.create_function Codegen.the_module in
  116. (* Set up the optimizer pipeline. Start with registering info about how the
  117. * target lays out data structures. *)
  118. DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
  119. (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
  120. add_instruction_combining the_fpm;
  121. (* reassociate expressions. *)
  122. add_reassociation the_fpm;
  123. (* Eliminate Common SubExpressions. *)
  124. add_gvn the_fpm;
  125. (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
  126. add_cfg_simplification the_fpm;
  127. ignore (PassManager.initialize the_fpm);
  128. (* Run the main "interpreter loop" now. *)
  129. Toplevel.main_loop the_fpm the_execution_engine stream;
  130. The meat of the matter here, is the definition of "``the_fpm``". It
  131. requires a pointer to the ``the_module`` to construct itself. Once it is
  132. set up, we use a series of "add" calls to add a bunch of LLVM passes.
  133. The first pass is basically boilerplate, it adds a pass so that later
  134. optimizations know how the data structures in the program are laid out.
  135. The "``the_execution_engine``" variable is related to the JIT, which we
  136. will get to in the next section.
  137. In this case, we choose to add 4 optimization passes. The passes we
  138. chose here are a pretty standard set of "cleanup" optimizations that are
  139. useful for a wide variety of code. I won't delve into what they do but,
  140. believe me, they are a good starting place :).
  141. Once the ``Llvm.PassManager.`` is set up, we need to make use of it. We
  142. do this by running it after our newly created function is constructed
  143. (in ``Codegen.codegen_func``), but before it is returned to the client:
  144. .. code-block:: ocaml
  145. let codegen_func the_fpm = function
  146. ...
  147. try
  148. let ret_val = codegen_expr body in
  149. (* Finish off the function. *)
  150. let _ = build_ret ret_val builder in
  151. (* Validate the generated code, checking for consistency. *)
  152. Llvm_analysis.assert_valid_function the_function;
  153. (* Optimize the function. *)
  154. let _ = PassManager.run_function the_function the_fpm in
  155. the_function
  156. As you can see, this is pretty straightforward. The ``the_fpm``
  157. optimizes and updates the LLVM Function\* in place, improving
  158. (hopefully) its body. With this in place, we can try our test above
  159. again:
  160. ::
  161. ready> def test(x) (1+2+x)*(x+(1+2));
  162. ready> Read function definition:
  163. define double @test(double %x) {
  164. entry:
  165. %addtmp = fadd double %x, 3.000000e+00
  166. %multmp = fmul double %addtmp, %addtmp
  167. ret double %multmp
  168. }
  169. As expected, we now get our nicely optimized code, saving a floating
  170. point add instruction from every execution of this function.
  171. LLVM provides a wide variety of optimizations that can be used in
  172. certain circumstances. Some `documentation about the various
  173. passes <../Passes.html>`_ is available, but it isn't very complete.
  174. Another good source of ideas can come from looking at the passes that
  175. ``Clang`` runs to get started. The "``opt``" tool allows you to
  176. experiment with passes from the command line, so you can see if they do
  177. anything.
  178. Now that we have reasonable code coming out of our front-end, lets talk
  179. about executing it!
  180. Adding a JIT Compiler
  181. =====================
  182. Code that is available in LLVM IR can have a wide variety of tools
  183. applied to it. For example, you can run optimizations on it (as we did
  184. above), you can dump it out in textual or binary forms, you can compile
  185. the code to an assembly file (.s) for some target, or you can JIT
  186. compile it. The nice thing about the LLVM IR representation is that it
  187. is the "common currency" between many different parts of the compiler.
  188. In this section, we'll add JIT compiler support to our interpreter. The
  189. basic idea that we want for Kaleidoscope is to have the user enter
  190. function bodies as they do now, but immediately evaluate the top-level
  191. expressions they type in. For example, if they type in "1 + 2;", we
  192. should evaluate and print out 3. If they define a function, they should
  193. be able to call it from the command line.
  194. In order to do this, we first declare and initialize the JIT. This is
  195. done by adding a global variable and a call in ``main``:
  196. .. code-block:: ocaml
  197. ...
  198. let main () =
  199. ...
  200. (* Create the JIT. *)
  201. let the_execution_engine = ExecutionEngine.create Codegen.the_module in
  202. ...
  203. This creates an abstract "Execution Engine" which can be either a JIT
  204. compiler or the LLVM interpreter. LLVM will automatically pick a JIT
  205. compiler for you if one is available for your platform, otherwise it
  206. will fall back to the interpreter.
  207. Once the ``Llvm_executionengine.ExecutionEngine.t`` is created, the JIT
  208. is ready to be used. There are a variety of APIs that are useful, but
  209. the simplest one is the
  210. "``Llvm_executionengine.ExecutionEngine.run_function``" function. This
  211. method JIT compiles the specified LLVM Function and returns a function
  212. pointer to the generated machine code. In our case, this means that we
  213. can change the code that parses a top-level expression to look like
  214. this:
  215. .. code-block:: ocaml
  216. (* Evaluate a top-level expression into an anonymous function. *)
  217. let e = Parser.parse_toplevel stream in
  218. print_endline "parsed a top-level expr";
  219. let the_function = Codegen.codegen_func the_fpm e in
  220. dump_value the_function;
  221. (* JIT the function, returning a function pointer. *)
  222. let result = ExecutionEngine.run_function the_function [||]
  223. the_execution_engine in
  224. print_string "Evaluated to ";
  225. print_float (GenericValue.as_float Codegen.double_type result);
  226. print_newline ();
  227. Recall that we compile top-level expressions into a self-contained LLVM
  228. function that takes no arguments and returns the computed double.
  229. Because the LLVM JIT compiler matches the native platform ABI, this
  230. means that you can just cast the result pointer to a function pointer of
  231. that type and call it directly. This means, there is no difference
  232. between JIT compiled code and native machine code that is statically
  233. linked into your application.
  234. With just these two changes, lets see how Kaleidoscope works now!
  235. ::
  236. ready> 4+5;
  237. define double @""() {
  238. entry:
  239. ret double 9.000000e+00
  240. }
  241. Evaluated to 9.000000
  242. Well this looks like it is basically working. The dump of the function
  243. shows the "no argument function that always returns double" that we
  244. synthesize for each top level expression that is typed in. This
  245. demonstrates very basic functionality, but can we do more?
  246. ::
  247. ready> def testfunc(x y) x + y*2;
  248. Read function definition:
  249. define double @testfunc(double %x, double %y) {
  250. entry:
  251. %multmp = fmul double %y, 2.000000e+00
  252. %addtmp = fadd double %multmp, %x
  253. ret double %addtmp
  254. }
  255. ready> testfunc(4, 10);
  256. define double @""() {
  257. entry:
  258. %calltmp = call double @testfunc(double 4.000000e+00, double 1.000000e+01)
  259. ret double %calltmp
  260. }
  261. Evaluated to 24.000000
  262. This illustrates that we can now call user code, but there is something
  263. a bit subtle going on here. Note that we only invoke the JIT on the
  264. anonymous functions that *call testfunc*, but we never invoked it on
  265. *testfunc* itself. What actually happened here is that the JIT scanned
  266. for all non-JIT'd functions transitively called from the anonymous
  267. function and compiled all of them before returning from
  268. ``run_function``.
  269. The JIT provides a number of other more advanced interfaces for things
  270. like freeing allocated machine code, rejit'ing functions to update them,
  271. etc. However, even with this simple code, we get some surprisingly
  272. powerful capabilities - check this out (I removed the dump of the
  273. anonymous functions, you should get the idea by now :) :
  274. ::
  275. ready> extern sin(x);
  276. Read extern:
  277. declare double @sin(double)
  278. ready> extern cos(x);
  279. Read extern:
  280. declare double @cos(double)
  281. ready> sin(1.0);
  282. Evaluated to 0.841471
  283. ready> def foo(x) sin(x)*sin(x) + cos(x)*cos(x);
  284. Read function definition:
  285. define double @foo(double %x) {
  286. entry:
  287. %calltmp = call double @sin(double %x)
  288. %multmp = fmul double %calltmp, %calltmp
  289. %calltmp2 = call double @cos(double %x)
  290. %multmp4 = fmul double %calltmp2, %calltmp2
  291. %addtmp = fadd double %multmp, %multmp4
  292. ret double %addtmp
  293. }
  294. ready> foo(4.0);
  295. Evaluated to 1.000000
  296. Whoa, how does the JIT know about sin and cos? The answer is
  297. surprisingly simple: in this example, the JIT started execution of a
  298. function and got to a function call. It realized that the function was
  299. not yet JIT compiled and invoked the standard set of routines to resolve
  300. the function. In this case, there is no body defined for the function,
  301. so the JIT ended up calling "``dlsym("sin")``" on the Kaleidoscope
  302. process itself. Since "``sin``" is defined within the JIT's address
  303. space, it simply patches up calls in the module to call the libm version
  304. of ``sin`` directly.
  305. The LLVM JIT provides a number of interfaces (look in the
  306. ``llvm_executionengine.mli`` file) for controlling how unknown functions
  307. get resolved. It allows you to establish explicit mappings between IR
  308. objects and addresses (useful for LLVM global variables that you want to
  309. map to static tables, for example), allows you to dynamically decide on
  310. the fly based on the function name, and even allows you to have the JIT
  311. compile functions lazily the first time they're called.
  312. One interesting application of this is that we can now extend the
  313. language by writing arbitrary C code to implement operations. For
  314. example, if we add:
  315. .. code-block:: c++
  316. /* putchard - putchar that takes a double and returns 0. */
  317. extern "C"
  318. double putchard(double X) {
  319. putchar((char)X);
  320. return 0;
  321. }
  322. Now we can produce simple output to the console by using things like:
  323. "``extern putchard(x); putchard(120);``", which prints a lowercase 'x'
  324. on the console (120 is the ASCII code for 'x'). Similar code could be
  325. used to implement file I/O, console input, and many other capabilities
  326. in Kaleidoscope.
  327. This completes the JIT and optimizer chapter of the Kaleidoscope
  328. tutorial. At this point, we can compile a non-Turing-complete
  329. programming language, optimize and JIT compile it in a user-driven way.
  330. Next up we'll look into `extending the language with control flow
  331. constructs <OCamlLangImpl5.html>`_, tackling some interesting LLVM IR
  332. issues along the way.
  333. Full Code Listing
  334. =================
  335. Here is the complete code listing for our running example, enhanced with
  336. the LLVM JIT and optimizer. To build this example, use:
  337. .. code-block:: bash
  338. # Compile
  339. ocamlbuild toy.byte
  340. # Run
  341. ./toy.byte
  342. Here is the code:
  343. \_tags:
  344. ::
  345. <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
  346. <*.{byte,native}>: g++, use_llvm, use_llvm_analysis
  347. <*.{byte,native}>: use_llvm_executionengine, use_llvm_target
  348. <*.{byte,native}>: use_llvm_scalar_opts, use_bindings
  349. myocamlbuild.ml:
  350. .. code-block:: ocaml
  351. open Ocamlbuild_plugin;;
  352. ocaml_lib ~extern:true "llvm";;
  353. ocaml_lib ~extern:true "llvm_analysis";;
  354. ocaml_lib ~extern:true "llvm_executionengine";;
  355. ocaml_lib ~extern:true "llvm_target";;
  356. ocaml_lib ~extern:true "llvm_scalar_opts";;
  357. flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"]);;
  358. dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
  359. token.ml:
  360. .. code-block:: ocaml
  361. (*===----------------------------------------------------------------------===
  362. * Lexer Tokens
  363. *===----------------------------------------------------------------------===*)
  364. (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
  365. * these others for known things. *)
  366. type token =
  367. (* commands *)
  368. | Def | Extern
  369. (* primary *)
  370. | Ident of string | Number of float
  371. (* unknown *)
  372. | Kwd of char
  373. lexer.ml:
  374. .. code-block:: ocaml
  375. (*===----------------------------------------------------------------------===
  376. * Lexer
  377. *===----------------------------------------------------------------------===*)
  378. let rec lex = parser
  379. (* Skip any whitespace. *)
  380. | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
  381. (* identifier: [a-zA-Z][a-zA-Z0-9] *)
  382. | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
  383. let buffer = Buffer.create 1 in
  384. Buffer.add_char buffer c;
  385. lex_ident buffer stream
  386. (* number: [0-9.]+ *)
  387. | [< ' ('0' .. '9' as c); stream >] ->
  388. let buffer = Buffer.create 1 in
  389. Buffer.add_char buffer c;
  390. lex_number buffer stream
  391. (* Comment until end of line. *)
  392. | [< ' ('#'); stream >] ->
  393. lex_comment stream
  394. (* Otherwise, just return the character as its ascii value. *)
  395. | [< 'c; stream >] ->
  396. [< 'Token.Kwd c; lex stream >]
  397. (* end of stream. *)
  398. | [< >] -> [< >]
  399. and lex_number buffer = parser
  400. | [< ' ('0' .. '9' | '.' as c); stream >] ->
  401. Buffer.add_char buffer c;
  402. lex_number buffer stream
  403. | [< stream=lex >] ->
  404. [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
  405. and lex_ident buffer = parser
  406. | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
  407. Buffer.add_char buffer c;
  408. lex_ident buffer stream
  409. | [< stream=lex >] ->
  410. match Buffer.contents buffer with
  411. | "def" -> [< 'Token.Def; stream >]
  412. | "extern" -> [< 'Token.Extern; stream >]
  413. | id -> [< 'Token.Ident id; stream >]
  414. and lex_comment = parser
  415. | [< ' ('\n'); stream=lex >] -> stream
  416. | [< 'c; e=lex_comment >] -> e
  417. | [< >] -> [< >]
  418. ast.ml:
  419. .. code-block:: ocaml
  420. (*===----------------------------------------------------------------------===
  421. * Abstract Syntax Tree (aka Parse Tree)
  422. *===----------------------------------------------------------------------===*)
  423. (* expr - Base type for all expression nodes. *)
  424. type expr =
  425. (* variant for numeric literals like "1.0". *)
  426. | Number of float
  427. (* variant for referencing a variable, like "a". *)
  428. | Variable of string
  429. (* variant for a binary operator. *)
  430. | Binary of char * expr * expr
  431. (* variant for function calls. *)
  432. | Call of string * expr array
  433. (* proto - This type represents the "prototype" for a function, which captures
  434. * its name, and its argument names (thus implicitly the number of arguments the
  435. * function takes). *)
  436. type proto = Prototype of string * string array
  437. (* func - This type represents a function definition itself. *)
  438. type func = Function of proto * expr
  439. parser.ml:
  440. .. code-block:: ocaml
  441. (*===---------------------------------------------------------------------===
  442. * Parser
  443. *===---------------------------------------------------------------------===*)
  444. (* binop_precedence - This holds the precedence for each binary operator that is
  445. * defined *)
  446. let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
  447. (* precedence - Get the precedence of the pending binary operator token. *)
  448. let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
  449. (* primary
  450. * ::= identifier
  451. * ::= numberexpr
  452. * ::= parenexpr *)
  453. let rec parse_primary = parser
  454. (* numberexpr ::= number *)
  455. | [< 'Token.Number n >] -> Ast.Number n
  456. (* parenexpr ::= '(' expression ')' *)
  457. | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
  458. (* identifierexpr
  459. * ::= identifier
  460. * ::= identifier '(' argumentexpr ')' *)
  461. | [< 'Token.Ident id; stream >] ->
  462. let rec parse_args accumulator = parser
  463. | [< e=parse_expr; stream >] ->
  464. begin parser
  465. | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
  466. | [< >] -> e :: accumulator
  467. end stream
  468. | [< >] -> accumulator
  469. in
  470. let rec parse_ident id = parser
  471. (* Call. *)
  472. | [< 'Token.Kwd '(';
  473. args=parse_args [];
  474. 'Token.Kwd ')' ?? "expected ')'">] ->
  475. Ast.Call (id, Array.of_list (List.rev args))
  476. (* Simple variable ref. *)
  477. | [< >] -> Ast.Variable id
  478. in
  479. parse_ident id stream
  480. | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
  481. (* binoprhs
  482. * ::= ('+' primary)* *)
  483. and parse_bin_rhs expr_prec lhs stream =
  484. match Stream.peek stream with
  485. (* If this is a binop, find its precedence. *)
  486. | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
  487. let token_prec = precedence c in
  488. (* If this is a binop that binds at least as tightly as the current binop,
  489. * consume it, otherwise we are done. *)
  490. if token_prec < expr_prec then lhs else begin
  491. (* Eat the binop. *)
  492. Stream.junk stream;
  493. (* Parse the primary expression after the binary operator. *)
  494. let rhs = parse_primary stream in
  495. (* Okay, we know this is a binop. *)
  496. let rhs =
  497. match Stream.peek stream with
  498. | Some (Token.Kwd c2) ->
  499. (* If BinOp binds less tightly with rhs than the operator after
  500. * rhs, let the pending operator take rhs as its lhs. *)
  501. let next_prec = precedence c2 in
  502. if token_prec < next_prec
  503. then parse_bin_rhs (token_prec + 1) rhs stream
  504. else rhs
  505. | _ -> rhs
  506. in
  507. (* Merge lhs/rhs. *)
  508. let lhs = Ast.Binary (c, lhs, rhs) in
  509. parse_bin_rhs expr_prec lhs stream
  510. end
  511. | _ -> lhs
  512. (* expression
  513. * ::= primary binoprhs *)
  514. and parse_expr = parser
  515. | [< lhs=parse_primary; stream >] -> parse_bin_rhs 0 lhs stream
  516. (* prototype
  517. * ::= id '(' id* ')' *)
  518. let parse_prototype =
  519. let rec parse_args accumulator = parser
  520. | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
  521. | [< >] -> accumulator
  522. in
  523. parser
  524. | [< 'Token.Ident id;
  525. 'Token.Kwd '(' ?? "expected '(' in prototype";
  526. args=parse_args [];
  527. 'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
  528. (* success. *)
  529. Ast.Prototype (id, Array.of_list (List.rev args))
  530. | [< >] ->
  531. raise (Stream.Error "expected function name in prototype")
  532. (* definition ::= 'def' prototype expression *)
  533. let parse_definition = parser
  534. | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
  535. Ast.Function (p, e)
  536. (* toplevelexpr ::= expression *)
  537. let parse_toplevel = parser
  538. | [< e=parse_expr >] ->
  539. (* Make an anonymous proto. *)
  540. Ast.Function (Ast.Prototype ("", [||]), e)
  541. (* external ::= 'extern' prototype *)
  542. let parse_extern = parser
  543. | [< 'Token.Extern; e=parse_prototype >] -> e
  544. codegen.ml:
  545. .. code-block:: ocaml
  546. (*===----------------------------------------------------------------------===
  547. * Code Generation
  548. *===----------------------------------------------------------------------===*)
  549. open Llvm
  550. exception Error of string
  551. let context = global_context ()
  552. let the_module = create_module context "my cool jit"
  553. let builder = builder context
  554. let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
  555. let double_type = double_type context
  556. let rec codegen_expr = function
  557. | Ast.Number n -> const_float double_type n
  558. | Ast.Variable name ->
  559. (try Hashtbl.find named_values name with
  560. | Not_found -> raise (Error "unknown variable name"))
  561. | Ast.Binary (op, lhs, rhs) ->
  562. let lhs_val = codegen_expr lhs in
  563. let rhs_val = codegen_expr rhs in
  564. begin
  565. match op with
  566. | '+' -> build_add lhs_val rhs_val "addtmp" builder
  567. | '-' -> build_sub lhs_val rhs_val "subtmp" builder
  568. | '*' -> build_mul lhs_val rhs_val "multmp" builder
  569. | '<' ->
  570. (* Convert bool 0/1 to double 0.0 or 1.0 *)
  571. let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
  572. build_uitofp i double_type "booltmp" builder
  573. | _ -> raise (Error "invalid binary operator")
  574. end
  575. | Ast.Call (callee, args) ->
  576. (* Look up the name in the module table. *)
  577. let callee =
  578. match lookup_function callee the_module with
  579. | Some callee -> callee
  580. | None -> raise (Error "unknown function referenced")
  581. in
  582. let params = params callee in
  583. (* If argument mismatch error. *)
  584. if Array.length params == Array.length args then () else
  585. raise (Error "incorrect # arguments passed");
  586. let args = Array.map codegen_expr args in
  587. build_call callee args "calltmp" builder
  588. let codegen_proto = function
  589. | Ast.Prototype (name, args) ->
  590. (* Make the function type: double(double,double) etc. *)
  591. let doubles = Array.make (Array.length args) double_type in
  592. let ft = function_type double_type doubles in
  593. let f =
  594. match lookup_function name the_module with
  595. | None -> declare_function name ft the_module
  596. (* If 'f' conflicted, there was already something named 'name'. If it
  597. * has a body, don't allow redefinition or reextern. *)
  598. | Some f ->
  599. (* If 'f' already has a body, reject this. *)
  600. if block_begin f <> At_end f then
  601. raise (Error "redefinition of function");
  602. (* If 'f' took a different number of arguments, reject. *)
  603. if element_type (type_of f) <> ft then
  604. raise (Error "redefinition of function with different # args");
  605. f
  606. in
  607. (* Set names for all arguments. *)
  608. Array.iteri (fun i a ->
  609. let n = args.(i) in
  610. set_value_name n a;
  611. Hashtbl.add named_values n a;
  612. ) (params f);
  613. f
  614. let codegen_func the_fpm = function
  615. | Ast.Function (proto, body) ->
  616. Hashtbl.clear named_values;
  617. let the_function = codegen_proto proto in
  618. (* Create a new basic block to start insertion into. *)
  619. let bb = append_block context "entry" the_function in
  620. position_at_end bb builder;
  621. try
  622. let ret_val = codegen_expr body in
  623. (* Finish off the function. *)
  624. let _ = build_ret ret_val builder in
  625. (* Validate the generated code, checking for consistency. *)
  626. Llvm_analysis.assert_valid_function the_function;
  627. (* Optimize the function. *)
  628. let _ = PassManager.run_function the_function the_fpm in
  629. the_function
  630. with e ->
  631. delete_function the_function;
  632. raise e
  633. toplevel.ml:
  634. .. code-block:: ocaml
  635. (*===----------------------------------------------------------------------===
  636. * Top-Level parsing and JIT Driver
  637. *===----------------------------------------------------------------------===*)
  638. open Llvm
  639. open Llvm_executionengine
  640. (* top ::= definition | external | expression | ';' *)
  641. let rec main_loop the_fpm the_execution_engine stream =
  642. match Stream.peek stream with
  643. | None -> ()
  644. (* ignore top-level semicolons. *)
  645. | Some (Token.Kwd ';') ->
  646. Stream.junk stream;
  647. main_loop the_fpm the_execution_engine stream
  648. | Some token ->
  649. begin
  650. try match token with
  651. | Token.Def ->
  652. let e = Parser.parse_definition stream in
  653. print_endline "parsed a function definition.";
  654. dump_value (Codegen.codegen_func the_fpm e);
  655. | Token.Extern ->
  656. let e = Parser.parse_extern stream in
  657. print_endline "parsed an extern.";
  658. dump_value (Codegen.codegen_proto e);
  659. | _ ->
  660. (* Evaluate a top-level expression into an anonymous function. *)
  661. let e = Parser.parse_toplevel stream in
  662. print_endline "parsed a top-level expr";
  663. let the_function = Codegen.codegen_func the_fpm e in
  664. dump_value the_function;
  665. (* JIT the function, returning a function pointer. *)
  666. let result = ExecutionEngine.run_function the_function [||]
  667. the_execution_engine in
  668. print_string "Evaluated to ";
  669. print_float (GenericValue.as_float Codegen.double_type result);
  670. print_newline ();
  671. with Stream.Error s | Codegen.Error s ->
  672. (* Skip token for error recovery. *)
  673. Stream.junk stream;
  674. print_endline s;
  675. end;
  676. print_string "ready> "; flush stdout;
  677. main_loop the_fpm the_execution_engine stream
  678. toy.ml:
  679. .. code-block:: ocaml
  680. (*===----------------------------------------------------------------------===
  681. * Main driver code.
  682. *===----------------------------------------------------------------------===*)
  683. open Llvm
  684. open Llvm_executionengine
  685. open Llvm_target
  686. open Llvm_scalar_opts
  687. let main () =
  688. ignore (initialize_native_target ());
  689. (* Install standard binary operators.
  690. * 1 is the lowest precedence. *)
  691. Hashtbl.add Parser.binop_precedence '<' 10;
  692. Hashtbl.add Parser.binop_precedence '+' 20;
  693. Hashtbl.add Parser.binop_precedence '-' 20;
  694. Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *)
  695. (* Prime the first token. *)
  696. print_string "ready> "; flush stdout;
  697. let stream = Lexer.lex (Stream.of_channel stdin) in
  698. (* Create the JIT. *)
  699. let the_execution_engine = ExecutionEngine.create Codegen.the_module in
  700. let the_fpm = PassManager.create_function Codegen.the_module in
  701. (* Set up the optimizer pipeline. Start with registering info about how the
  702. * target lays out data structures. *)
  703. DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
  704. (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
  705. add_instruction_combination the_fpm;
  706. (* reassociate expressions. *)
  707. add_reassociation the_fpm;
  708. (* Eliminate Common SubExpressions. *)
  709. add_gvn the_fpm;
  710. (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
  711. add_cfg_simplification the_fpm;
  712. ignore (PassManager.initialize the_fpm);
  713. (* Run the main "interpreter loop" now. *)
  714. Toplevel.main_loop the_fpm the_execution_engine stream;
  715. (* Print out all the generated code. *)
  716. dump_module Codegen.the_module
  717. ;;
  718. main ()
  719. bindings.c
  720. .. code-block:: c
  721. #include <stdio.h>
  722. /* putchard - putchar that takes a double and returns 0. */
  723. extern double putchard(double X) {
  724. putchar((char)X);
  725. return 0;
  726. }
  727. `Next: Extending the language: control flow <OCamlLangImpl5.html>`_