OCamlLangImpl6.rst 58 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441
  1. ============================================================
  2. Kaleidoscope: Extending the Language: User-defined Operators
  3. ============================================================
  4. .. contents::
  5. :local:
  6. Chapter 6 Introduction
  7. ======================
  8. Welcome to Chapter 6 of the "`Implementing a language with
  9. LLVM <index.html>`_" tutorial. At this point in our tutorial, we now
  10. have a fully functional language that is fairly minimal, but also
  11. useful. There is still one big problem with it, however. Our language
  12. doesn't have many useful operators (like division, logical negation, or
  13. even any comparisons besides less-than).
  14. This chapter of the tutorial takes a wild digression into adding
  15. user-defined operators to the simple and beautiful Kaleidoscope
  16. language. This digression now gives us a simple and ugly language in
  17. some ways, but also a powerful one at the same time. One of the great
  18. things about creating your own language is that you get to decide what
  19. is good or bad. In this tutorial we'll assume that it is okay to use
  20. this as a way to show some interesting parsing techniques.
  21. At the end of this tutorial, we'll run through an example Kaleidoscope
  22. application that `renders the Mandelbrot set <#kicking-the-tires>`_. This gives an
  23. example of what you can build with Kaleidoscope and its feature set.
  24. User-defined Operators: the Idea
  25. ================================
  26. The "operator overloading" that we will add to Kaleidoscope is more
  27. general than languages like C++. In C++, you are only allowed to
  28. redefine existing operators: you can't programmatically change the
  29. grammar, introduce new operators, change precedence levels, etc. In this
  30. chapter, we will add this capability to Kaleidoscope, which will let the
  31. user round out the set of operators that are supported.
  32. The point of going into user-defined operators in a tutorial like this
  33. is to show the power and flexibility of using a hand-written parser.
  34. Thus far, the parser we have been implementing uses recursive descent
  35. for most parts of the grammar and operator precedence parsing for the
  36. expressions. See `Chapter 2 <OCamlLangImpl2.html>`_ for details. Without
  37. using operator precedence parsing, it would be very difficult to allow
  38. the programmer to introduce new operators into the grammar: the grammar
  39. is dynamically extensible as the JIT runs.
  40. The two specific features we'll add are programmable unary operators
  41. (right now, Kaleidoscope has no unary operators at all) as well as
  42. binary operators. An example of this is:
  43. ::
  44. # Logical unary not.
  45. def unary!(v)
  46. if v then
  47. 0
  48. else
  49. 1;
  50. # Define > with the same precedence as <.
  51. def binary> 10 (LHS RHS)
  52. RHS < LHS;
  53. # Binary "logical or", (note that it does not "short circuit")
  54. def binary| 5 (LHS RHS)
  55. if LHS then
  56. 1
  57. else if RHS then
  58. 1
  59. else
  60. 0;
  61. # Define = with slightly lower precedence than relationals.
  62. def binary= 9 (LHS RHS)
  63. !(LHS < RHS | LHS > RHS);
  64. Many languages aspire to being able to implement their standard runtime
  65. library in the language itself. In Kaleidoscope, we can implement
  66. significant parts of the language in the library!
  67. We will break down implementation of these features into two parts:
  68. implementing support for user-defined binary operators and adding unary
  69. operators.
  70. User-defined Binary Operators
  71. =============================
  72. Adding support for user-defined binary operators is pretty simple with
  73. our current framework. We'll first add support for the unary/binary
  74. keywords:
  75. .. code-block:: ocaml
  76. type token =
  77. ...
  78. (* operators *)
  79. | Binary | Unary
  80. ...
  81. and lex_ident buffer = parser
  82. ...
  83. | "for" -> [< 'Token.For; stream >]
  84. | "in" -> [< 'Token.In; stream >]
  85. | "binary" -> [< 'Token.Binary; stream >]
  86. | "unary" -> [< 'Token.Unary; stream >]
  87. This just adds lexer support for the unary and binary keywords, like we
  88. did in `previous chapters <OCamlLangImpl5.html#lexer-extensions-for-if-then-else>`_. One nice
  89. thing about our current AST, is that we represent binary operators with
  90. full generalisation by using their ASCII code as the opcode. For our
  91. extended operators, we'll use this same representation, so we don't need
  92. any new AST or parser support.
  93. On the other hand, we have to be able to represent the definitions of
  94. these new operators, in the "def binary\| 5" part of the function
  95. definition. In our grammar so far, the "name" for the function
  96. definition is parsed as the "prototype" production and into the
  97. ``Ast.Prototype`` AST node. To represent our new user-defined operators
  98. as prototypes, we have to extend the ``Ast.Prototype`` AST node like
  99. this:
  100. .. code-block:: ocaml
  101. (* proto - This type represents the "prototype" for a function, which captures
  102. * its name, and its argument names (thus implicitly the number of arguments the
  103. * function takes). *)
  104. type proto =
  105. | Prototype of string * string array
  106. | BinOpPrototype of string * string array * int
  107. Basically, in addition to knowing a name for the prototype, we now keep
  108. track of whether it was an operator, and if it was, what precedence
  109. level the operator is at. The precedence is only used for binary
  110. operators (as you'll see below, it just doesn't apply for unary
  111. operators). Now that we have a way to represent the prototype for a
  112. user-defined operator, we need to parse it:
  113. .. code-block:: ocaml
  114. (* prototype
  115. * ::= id '(' id* ')'
  116. * ::= binary LETTER number? (id, id)
  117. * ::= unary LETTER number? (id) *)
  118. let parse_prototype =
  119. let rec parse_args accumulator = parser
  120. | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
  121. | [< >] -> accumulator
  122. in
  123. let parse_operator = parser
  124. | [< 'Token.Unary >] -> "unary", 1
  125. | [< 'Token.Binary >] -> "binary", 2
  126. in
  127. let parse_binary_precedence = parser
  128. | [< 'Token.Number n >] -> int_of_float n
  129. | [< >] -> 30
  130. in
  131. parser
  132. | [< 'Token.Ident id;
  133. 'Token.Kwd '(' ?? "expected '(' in prototype";
  134. args=parse_args [];
  135. 'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
  136. (* success. *)
  137. Ast.Prototype (id, Array.of_list (List.rev args))
  138. | [< (prefix, kind)=parse_operator;
  139. 'Token.Kwd op ?? "expected an operator";
  140. (* Read the precedence if present. *)
  141. binary_precedence=parse_binary_precedence;
  142. 'Token.Kwd '(' ?? "expected '(' in prototype";
  143. args=parse_args [];
  144. 'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
  145. let name = prefix ^ (String.make 1 op) in
  146. let args = Array.of_list (List.rev args) in
  147. (* Verify right number of arguments for operator. *)
  148. if Array.length args != kind
  149. then raise (Stream.Error "invalid number of operands for operator")
  150. else
  151. if kind == 1 then
  152. Ast.Prototype (name, args)
  153. else
  154. Ast.BinOpPrototype (name, args, binary_precedence)
  155. | [< >] ->
  156. raise (Stream.Error "expected function name in prototype")
  157. This is all fairly straightforward parsing code, and we have already
  158. seen a lot of similar code in the past. One interesting part about the
  159. code above is the couple lines that set up ``name`` for binary
  160. operators. This builds names like "binary@" for a newly defined "@"
  161. operator. This then takes advantage of the fact that symbol names in the
  162. LLVM symbol table are allowed to have any character in them, including
  163. embedded nul characters.
  164. The next interesting thing to add, is codegen support for these binary
  165. operators. Given our current structure, this is a simple addition of a
  166. default case for our existing binary operator node:
  167. .. code-block:: ocaml
  168. let codegen_expr = function
  169. ...
  170. | Ast.Binary (op, lhs, rhs) ->
  171. let lhs_val = codegen_expr lhs in
  172. let rhs_val = codegen_expr rhs in
  173. begin
  174. match op with
  175. | '+' -> build_add lhs_val rhs_val "addtmp" builder
  176. | '-' -> build_sub lhs_val rhs_val "subtmp" builder
  177. | '*' -> build_mul lhs_val rhs_val "multmp" builder
  178. | '<' ->
  179. (* Convert bool 0/1 to double 0.0 or 1.0 *)
  180. let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
  181. build_uitofp i double_type "booltmp" builder
  182. | _ ->
  183. (* If it wasn't a builtin binary operator, it must be a user defined
  184. * one. Emit a call to it. *)
  185. let callee = "binary" ^ (String.make 1 op) in
  186. let callee =
  187. match lookup_function callee the_module with
  188. | Some callee -> callee
  189. | None -> raise (Error "binary operator not found!")
  190. in
  191. build_call callee [|lhs_val; rhs_val|] "binop" builder
  192. end
  193. As you can see above, the new code is actually really simple. It just
  194. does a lookup for the appropriate operator in the symbol table and
  195. generates a function call to it. Since user-defined operators are just
  196. built as normal functions (because the "prototype" boils down to a
  197. function with the right name) everything falls into place.
  198. The final piece of code we are missing, is a bit of top level magic:
  199. .. code-block:: ocaml
  200. let codegen_func the_fpm = function
  201. | Ast.Function (proto, body) ->
  202. Hashtbl.clear named_values;
  203. let the_function = codegen_proto proto in
  204. (* If this is an operator, install it. *)
  205. begin match proto with
  206. | Ast.BinOpPrototype (name, args, prec) ->
  207. let op = name.[String.length name - 1] in
  208. Hashtbl.add Parser.binop_precedence op prec;
  209. | _ -> ()
  210. end;
  211. (* Create a new basic block to start insertion into. *)
  212. let bb = append_block context "entry" the_function in
  213. position_at_end bb builder;
  214. ...
  215. Basically, before codegening a function, if it is a user-defined
  216. operator, we register it in the precedence table. This allows the binary
  217. operator parsing logic we already have in place to handle it. Since we
  218. are working on a fully-general operator precedence parser, this is all
  219. we need to do to "extend the grammar".
  220. Now we have useful user-defined binary operators. This builds a lot on
  221. the previous framework we built for other operators. Adding unary
  222. operators is a bit more challenging, because we don't have any framework
  223. for it yet - lets see what it takes.
  224. User-defined Unary Operators
  225. ============================
  226. Since we don't currently support unary operators in the Kaleidoscope
  227. language, we'll need to add everything to support them. Above, we added
  228. simple support for the 'unary' keyword to the lexer. In addition to
  229. that, we need an AST node:
  230. .. code-block:: ocaml
  231. type expr =
  232. ...
  233. (* variant for a unary operator. *)
  234. | Unary of char * expr
  235. ...
  236. This AST node is very simple and obvious by now. It directly mirrors the
  237. binary operator AST node, except that it only has one child. With this,
  238. we need to add the parsing logic. Parsing a unary operator is pretty
  239. simple: we'll add a new function to do it:
  240. .. code-block:: ocaml
  241. (* unary
  242. * ::= primary
  243. * ::= '!' unary *)
  244. and parse_unary = parser
  245. (* If this is a unary operator, read it. *)
  246. | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] ->
  247. Ast.Unary (op, operand)
  248. (* If the current token is not an operator, it must be a primary expr. *)
  249. | [< stream >] -> parse_primary stream
  250. The grammar we add is pretty straightforward here. If we see a unary
  251. operator when parsing a primary operator, we eat the operator as a
  252. prefix and parse the remaining piece as another unary operator. This
  253. allows us to handle multiple unary operators (e.g. "!!x"). Note that
  254. unary operators can't have ambiguous parses like binary operators can,
  255. so there is no need for precedence information.
  256. The problem with this function, is that we need to call ParseUnary from
  257. somewhere. To do this, we change previous callers of ParsePrimary to
  258. call ``parse_unary`` instead:
  259. .. code-block:: ocaml
  260. (* binoprhs
  261. * ::= ('+' primary)* *)
  262. and parse_bin_rhs expr_prec lhs stream =
  263. ...
  264. (* Parse the unary expression after the binary operator. *)
  265. let rhs = parse_unary stream in
  266. ...
  267. ...
  268. (* expression
  269. * ::= primary binoprhs *)
  270. and parse_expr = parser
  271. | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream
  272. With these two simple changes, we are now able to parse unary operators
  273. and build the AST for them. Next up, we need to add parser support for
  274. prototypes, to parse the unary operator prototype. We extend the binary
  275. operator code above with:
  276. .. code-block:: ocaml
  277. (* prototype
  278. * ::= id '(' id* ')'
  279. * ::= binary LETTER number? (id, id)
  280. * ::= unary LETTER number? (id) *)
  281. let parse_prototype =
  282. let rec parse_args accumulator = parser
  283. | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
  284. | [< >] -> accumulator
  285. in
  286. let parse_operator = parser
  287. | [< 'Token.Unary >] -> "unary", 1
  288. | [< 'Token.Binary >] -> "binary", 2
  289. in
  290. let parse_binary_precedence = parser
  291. | [< 'Token.Number n >] -> int_of_float n
  292. | [< >] -> 30
  293. in
  294. parser
  295. | [< 'Token.Ident id;
  296. 'Token.Kwd '(' ?? "expected '(' in prototype";
  297. args=parse_args [];
  298. 'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
  299. (* success. *)
  300. Ast.Prototype (id, Array.of_list (List.rev args))
  301. | [< (prefix, kind)=parse_operator;
  302. 'Token.Kwd op ?? "expected an operator";
  303. (* Read the precedence if present. *)
  304. binary_precedence=parse_binary_precedence;
  305. 'Token.Kwd '(' ?? "expected '(' in prototype";
  306. args=parse_args [];
  307. 'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
  308. let name = prefix ^ (String.make 1 op) in
  309. let args = Array.of_list (List.rev args) in
  310. (* Verify right number of arguments for operator. *)
  311. if Array.length args != kind
  312. then raise (Stream.Error "invalid number of operands for operator")
  313. else
  314. if kind == 1 then
  315. Ast.Prototype (name, args)
  316. else
  317. Ast.BinOpPrototype (name, args, binary_precedence)
  318. | [< >] ->
  319. raise (Stream.Error "expected function name in prototype")
  320. As with binary operators, we name unary operators with a name that
  321. includes the operator character. This assists us at code generation
  322. time. Speaking of, the final piece we need to add is codegen support for
  323. unary operators. It looks like this:
  324. .. code-block:: ocaml
  325. let rec codegen_expr = function
  326. ...
  327. | Ast.Unary (op, operand) ->
  328. let operand = codegen_expr operand in
  329. let callee = "unary" ^ (String.make 1 op) in
  330. let callee =
  331. match lookup_function callee the_module with
  332. | Some callee -> callee
  333. | None -> raise (Error "unknown unary operator")
  334. in
  335. build_call callee [|operand|] "unop" builder
  336. This code is similar to, but simpler than, the code for binary
  337. operators. It is simpler primarily because it doesn't need to handle any
  338. predefined operators.
  339. Kicking the Tires
  340. =================
  341. It is somewhat hard to believe, but with a few simple extensions we've
  342. covered in the last chapters, we have grown a real-ish language. With
  343. this, we can do a lot of interesting things, including I/O, math, and a
  344. bunch of other things. For example, we can now add a nice sequencing
  345. operator (printd is defined to print out the specified value and a
  346. newline):
  347. ::
  348. ready> extern printd(x);
  349. Read extern: declare double @printd(double)
  350. ready> def binary : 1 (x y) 0; # Low-precedence operator that ignores operands.
  351. ..
  352. ready> printd(123) : printd(456) : printd(789);
  353. 123.000000
  354. 456.000000
  355. 789.000000
  356. Evaluated to 0.000000
  357. We can also define a bunch of other "primitive" operations, such as:
  358. ::
  359. # Logical unary not.
  360. def unary!(v)
  361. if v then
  362. 0
  363. else
  364. 1;
  365. # Unary negate.
  366. def unary-(v)
  367. 0-v;
  368. # Define > with the same precedence as <.
  369. def binary> 10 (LHS RHS)
  370. RHS < LHS;
  371. # Binary logical or, which does not short circuit.
  372. def binary| 5 (LHS RHS)
  373. if LHS then
  374. 1
  375. else if RHS then
  376. 1
  377. else
  378. 0;
  379. # Binary logical and, which does not short circuit.
  380. def binary& 6 (LHS RHS)
  381. if !LHS then
  382. 0
  383. else
  384. !!RHS;
  385. # Define = with slightly lower precedence than relationals.
  386. def binary = 9 (LHS RHS)
  387. !(LHS < RHS | LHS > RHS);
  388. Given the previous if/then/else support, we can also define interesting
  389. functions for I/O. For example, the following prints out a character
  390. whose "density" reflects the value passed in: the lower the value, the
  391. denser the character:
  392. ::
  393. ready>
  394. extern putchard(char)
  395. def printdensity(d)
  396. if d > 8 then
  397. putchard(32) # ' '
  398. else if d > 4 then
  399. putchard(46) # '.'
  400. else if d > 2 then
  401. putchard(43) # '+'
  402. else
  403. putchard(42); # '*'
  404. ...
  405. ready> printdensity(1): printdensity(2): printdensity(3) :
  406. printdensity(4): printdensity(5): printdensity(9): putchard(10);
  407. *++..
  408. Evaluated to 0.000000
  409. Based on these simple primitive operations, we can start to define more
  410. interesting things. For example, here's a little function that solves
  411. for the number of iterations it takes a function in the complex plane to
  412. converge:
  413. ::
  414. # determine whether the specific location diverges.
  415. # Solve for z = z^2 + c in the complex plane.
  416. def mandelconverger(real imag iters creal cimag)
  417. if iters > 255 | (real*real + imag*imag > 4) then
  418. iters
  419. else
  420. mandelconverger(real*real - imag*imag + creal,
  421. 2*real*imag + cimag,
  422. iters+1, creal, cimag);
  423. # return the number of iterations required for the iteration to escape
  424. def mandelconverge(real imag)
  425. mandelconverger(real, imag, 0, real, imag);
  426. This "z = z\ :sup:`2`\ + c" function is a beautiful little creature
  427. that is the basis for computation of the `Mandelbrot
  428. Set <http://en.wikipedia.org/wiki/Mandelbrot_set>`_. Our
  429. ``mandelconverge`` function returns the number of iterations that it
  430. takes for a complex orbit to escape, saturating to 255. This is not a
  431. very useful function by itself, but if you plot its value over a
  432. two-dimensional plane, you can see the Mandelbrot set. Given that we are
  433. limited to using putchard here, our amazing graphical output is limited,
  434. but we can whip together something using the density plotter above:
  435. ::
  436. # compute and plot the mandelbrot set with the specified 2 dimensional range
  437. # info.
  438. def mandelhelp(xmin xmax xstep ymin ymax ystep)
  439. for y = ymin, y < ymax, ystep in (
  440. (for x = xmin, x < xmax, xstep in
  441. printdensity(mandelconverge(x,y)))
  442. : putchard(10)
  443. )
  444. # mandel - This is a convenient helper function for plotting the mandelbrot set
  445. # from the specified position with the specified Magnification.
  446. def mandel(realstart imagstart realmag imagmag)
  447. mandelhelp(realstart, realstart+realmag*78, realmag,
  448. imagstart, imagstart+imagmag*40, imagmag);
  449. Given this, we can try plotting out the mandelbrot set! Lets try it out:
  450. ::
  451. ready> mandel(-2.3, -1.3, 0.05, 0.07);
  452. *******************************+++++++++++*************************************
  453. *************************+++++++++++++++++++++++*******************************
  454. **********************+++++++++++++++++++++++++++++****************************
  455. *******************+++++++++++++++++++++.. ...++++++++*************************
  456. *****************++++++++++++++++++++++.... ...+++++++++***********************
  457. ***************+++++++++++++++++++++++..... ...+++++++++*********************
  458. **************+++++++++++++++++++++++.... ....+++++++++********************
  459. *************++++++++++++++++++++++...... .....++++++++*******************
  460. ************+++++++++++++++++++++....... .......+++++++******************
  461. ***********+++++++++++++++++++.... ... .+++++++*****************
  462. **********+++++++++++++++++....... .+++++++****************
  463. *********++++++++++++++........... ...+++++++***************
  464. ********++++++++++++............ ...++++++++**************
  465. ********++++++++++... .......... .++++++++**************
  466. *******+++++++++..... .+++++++++*************
  467. *******++++++++...... ..+++++++++*************
  468. *******++++++....... ..+++++++++*************
  469. *******+++++...... ..+++++++++*************
  470. *******.... .... ...+++++++++*************
  471. *******.... . ...+++++++++*************
  472. *******+++++...... ...+++++++++*************
  473. *******++++++....... ..+++++++++*************
  474. *******++++++++...... .+++++++++*************
  475. *******+++++++++..... ..+++++++++*************
  476. ********++++++++++... .......... .++++++++**************
  477. ********++++++++++++............ ...++++++++**************
  478. *********++++++++++++++.......... ...+++++++***************
  479. **********++++++++++++++++........ .+++++++****************
  480. **********++++++++++++++++++++.... ... ..+++++++****************
  481. ***********++++++++++++++++++++++....... .......++++++++*****************
  482. ************+++++++++++++++++++++++...... ......++++++++******************
  483. **************+++++++++++++++++++++++.... ....++++++++********************
  484. ***************+++++++++++++++++++++++..... ...+++++++++*********************
  485. *****************++++++++++++++++++++++.... ...++++++++***********************
  486. *******************+++++++++++++++++++++......++++++++*************************
  487. *********************++++++++++++++++++++++.++++++++***************************
  488. *************************+++++++++++++++++++++++*******************************
  489. ******************************+++++++++++++************************************
  490. *******************************************************************************
  491. *******************************************************************************
  492. *******************************************************************************
  493. Evaluated to 0.000000
  494. ready> mandel(-2, -1, 0.02, 0.04);
  495. **************************+++++++++++++++++++++++++++++++++++++++++++++++++++++
  496. ***********************++++++++++++++++++++++++++++++++++++++++++++++++++++++++
  497. *********************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.
  498. *******************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++...
  499. *****************+++++++++++++++++++++++++++++++++++++++++++++++++++++++++.....
  500. ***************++++++++++++++++++++++++++++++++++++++++++++++++++++++++........
  501. **************++++++++++++++++++++++++++++++++++++++++++++++++++++++...........
  502. ************+++++++++++++++++++++++++++++++++++++++++++++++++++++..............
  503. ***********++++++++++++++++++++++++++++++++++++++++++++++++++........ .
  504. **********++++++++++++++++++++++++++++++++++++++++++++++.............
  505. ********+++++++++++++++++++++++++++++++++++++++++++..................
  506. *******+++++++++++++++++++++++++++++++++++++++.......................
  507. ******+++++++++++++++++++++++++++++++++++...........................
  508. *****++++++++++++++++++++++++++++++++............................
  509. *****++++++++++++++++++++++++++++...............................
  510. ****++++++++++++++++++++++++++...... .........................
  511. ***++++++++++++++++++++++++......... ...... ...........
  512. ***++++++++++++++++++++++............
  513. **+++++++++++++++++++++..............
  514. **+++++++++++++++++++................
  515. *++++++++++++++++++.................
  516. *++++++++++++++++............ ...
  517. *++++++++++++++..............
  518. *+++....++++................
  519. *.......... ...........
  520. *
  521. *.......... ...........
  522. *+++....++++................
  523. *++++++++++++++..............
  524. *++++++++++++++++............ ...
  525. *++++++++++++++++++.................
  526. **+++++++++++++++++++................
  527. **+++++++++++++++++++++..............
  528. ***++++++++++++++++++++++............
  529. ***++++++++++++++++++++++++......... ...... ...........
  530. ****++++++++++++++++++++++++++...... .........................
  531. *****++++++++++++++++++++++++++++...............................
  532. *****++++++++++++++++++++++++++++++++............................
  533. ******+++++++++++++++++++++++++++++++++++...........................
  534. *******+++++++++++++++++++++++++++++++++++++++.......................
  535. ********+++++++++++++++++++++++++++++++++++++++++++..................
  536. Evaluated to 0.000000
  537. ready> mandel(-0.9, -1.4, 0.02, 0.03);
  538. *******************************************************************************
  539. *******************************************************************************
  540. *******************************************************************************
  541. **********+++++++++++++++++++++************************************************
  542. *+++++++++++++++++++++++++++++++++++++++***************************************
  543. +++++++++++++++++++++++++++++++++++++++++++++**********************************
  544. ++++++++++++++++++++++++++++++++++++++++++++++++++*****************************
  545. ++++++++++++++++++++++++++++++++++++++++++++++++++++++*************************
  546. +++++++++++++++++++++++++++++++++++++++++++++++++++++++++**********************
  547. +++++++++++++++++++++++++++++++++.........++++++++++++++++++*******************
  548. +++++++++++++++++++++++++++++++.... ......+++++++++++++++++++****************
  549. +++++++++++++++++++++++++++++....... ........+++++++++++++++++++**************
  550. ++++++++++++++++++++++++++++........ ........++++++++++++++++++++************
  551. +++++++++++++++++++++++++++......... .. ...+++++++++++++++++++++**********
  552. ++++++++++++++++++++++++++........... ....++++++++++++++++++++++********
  553. ++++++++++++++++++++++++............. .......++++++++++++++++++++++******
  554. +++++++++++++++++++++++............. ........+++++++++++++++++++++++****
  555. ++++++++++++++++++++++........... ..........++++++++++++++++++++++***
  556. ++++++++++++++++++++........... .........++++++++++++++++++++++*
  557. ++++++++++++++++++............ ...........++++++++++++++++++++
  558. ++++++++++++++++............... .............++++++++++++++++++
  559. ++++++++++++++................. ...............++++++++++++++++
  560. ++++++++++++.................. .................++++++++++++++
  561. +++++++++.................. .................+++++++++++++
  562. ++++++........ . ......... ..++++++++++++
  563. ++............ ...... ....++++++++++
  564. .............. ...++++++++++
  565. .............. ....+++++++++
  566. .............. .....++++++++
  567. ............. ......++++++++
  568. ........... .......++++++++
  569. ......... ........+++++++
  570. ......... ........+++++++
  571. ......... ....+++++++
  572. ........ ...+++++++
  573. ....... ...+++++++
  574. ....+++++++
  575. .....+++++++
  576. ....+++++++
  577. ....+++++++
  578. ....+++++++
  579. Evaluated to 0.000000
  580. ready> ^D
  581. At this point, you may be starting to realize that Kaleidoscope is a
  582. real and powerful language. It may not be self-similar :), but it can be
  583. used to plot things that are!
  584. With this, we conclude the "adding user-defined operators" chapter of
  585. the tutorial. We have successfully augmented our language, adding the
  586. ability to extend the language in the library, and we have shown how
  587. this can be used to build a simple but interesting end-user application
  588. in Kaleidoscope. At this point, Kaleidoscope can build a variety of
  589. applications that are functional and can call functions with
  590. side-effects, but it can't actually define and mutate a variable itself.
  591. Strikingly, variable mutation is an important feature of some languages,
  592. and it is not at all obvious how to `add support for mutable
  593. variables <OCamlLangImpl7.html>`_ without having to add an "SSA
  594. construction" phase to your front-end. In the next chapter, we will
  595. describe how you can add variable mutation without building SSA in your
  596. front-end.
  597. Full Code Listing
  598. =================
  599. Here is the complete code listing for our running example, enhanced with
  600. the if/then/else and for expressions.. To build this example, use:
  601. .. code-block:: bash
  602. # Compile
  603. ocamlbuild toy.byte
  604. # Run
  605. ./toy.byte
  606. Here is the code:
  607. \_tags:
  608. ::
  609. <{lexer,parser}.ml>: use_camlp4, pp(camlp4of)
  610. <*.{byte,native}>: g++, use_llvm, use_llvm_analysis
  611. <*.{byte,native}>: use_llvm_executionengine, use_llvm_target
  612. <*.{byte,native}>: use_llvm_scalar_opts, use_bindings
  613. myocamlbuild.ml:
  614. .. code-block:: ocaml
  615. open Ocamlbuild_plugin;;
  616. ocaml_lib ~extern:true "llvm";;
  617. ocaml_lib ~extern:true "llvm_analysis";;
  618. ocaml_lib ~extern:true "llvm_executionengine";;
  619. ocaml_lib ~extern:true "llvm_target";;
  620. ocaml_lib ~extern:true "llvm_scalar_opts";;
  621. flag ["link"; "ocaml"; "g++"] (S[A"-cc"; A"g++"; A"-cclib"; A"-rdynamic"]);;
  622. dep ["link"; "ocaml"; "use_bindings"] ["bindings.o"];;
  623. token.ml:
  624. .. code-block:: ocaml
  625. (*===----------------------------------------------------------------------===
  626. * Lexer Tokens
  627. *===----------------------------------------------------------------------===*)
  628. (* The lexer returns these 'Kwd' if it is an unknown character, otherwise one of
  629. * these others for known things. *)
  630. type token =
  631. (* commands *)
  632. | Def | Extern
  633. (* primary *)
  634. | Ident of string | Number of float
  635. (* unknown *)
  636. | Kwd of char
  637. (* control *)
  638. | If | Then | Else
  639. | For | In
  640. (* operators *)
  641. | Binary | Unary
  642. lexer.ml:
  643. .. code-block:: ocaml
  644. (*===----------------------------------------------------------------------===
  645. * Lexer
  646. *===----------------------------------------------------------------------===*)
  647. let rec lex = parser
  648. (* Skip any whitespace. *)
  649. | [< ' (' ' | '\n' | '\r' | '\t'); stream >] -> lex stream
  650. (* identifier: [a-zA-Z][a-zA-Z0-9] *)
  651. | [< ' ('A' .. 'Z' | 'a' .. 'z' as c); stream >] ->
  652. let buffer = Buffer.create 1 in
  653. Buffer.add_char buffer c;
  654. lex_ident buffer stream
  655. (* number: [0-9.]+ *)
  656. | [< ' ('0' .. '9' as c); stream >] ->
  657. let buffer = Buffer.create 1 in
  658. Buffer.add_char buffer c;
  659. lex_number buffer stream
  660. (* Comment until end of line. *)
  661. | [< ' ('#'); stream >] ->
  662. lex_comment stream
  663. (* Otherwise, just return the character as its ascii value. *)
  664. | [< 'c; stream >] ->
  665. [< 'Token.Kwd c; lex stream >]
  666. (* end of stream. *)
  667. | [< >] -> [< >]
  668. and lex_number buffer = parser
  669. | [< ' ('0' .. '9' | '.' as c); stream >] ->
  670. Buffer.add_char buffer c;
  671. lex_number buffer stream
  672. | [< stream=lex >] ->
  673. [< 'Token.Number (float_of_string (Buffer.contents buffer)); stream >]
  674. and lex_ident buffer = parser
  675. | [< ' ('A' .. 'Z' | 'a' .. 'z' | '0' .. '9' as c); stream >] ->
  676. Buffer.add_char buffer c;
  677. lex_ident buffer stream
  678. | [< stream=lex >] ->
  679. match Buffer.contents buffer with
  680. | "def" -> [< 'Token.Def; stream >]
  681. | "extern" -> [< 'Token.Extern; stream >]
  682. | "if" -> [< 'Token.If; stream >]
  683. | "then" -> [< 'Token.Then; stream >]
  684. | "else" -> [< 'Token.Else; stream >]
  685. | "for" -> [< 'Token.For; stream >]
  686. | "in" -> [< 'Token.In; stream >]
  687. | "binary" -> [< 'Token.Binary; stream >]
  688. | "unary" -> [< 'Token.Unary; stream >]
  689. | id -> [< 'Token.Ident id; stream >]
  690. and lex_comment = parser
  691. | [< ' ('\n'); stream=lex >] -> stream
  692. | [< 'c; e=lex_comment >] -> e
  693. | [< >] -> [< >]
  694. ast.ml:
  695. .. code-block:: ocaml
  696. (*===----------------------------------------------------------------------===
  697. * Abstract Syntax Tree (aka Parse Tree)
  698. *===----------------------------------------------------------------------===*)
  699. (* expr - Base type for all expression nodes. *)
  700. type expr =
  701. (* variant for numeric literals like "1.0". *)
  702. | Number of float
  703. (* variant for referencing a variable, like "a". *)
  704. | Variable of string
  705. (* variant for a unary operator. *)
  706. | Unary of char * expr
  707. (* variant for a binary operator. *)
  708. | Binary of char * expr * expr
  709. (* variant for function calls. *)
  710. | Call of string * expr array
  711. (* variant for if/then/else. *)
  712. | If of expr * expr * expr
  713. (* variant for for/in. *)
  714. | For of string * expr * expr * expr option * expr
  715. (* proto - This type represents the "prototype" for a function, which captures
  716. * its name, and its argument names (thus implicitly the number of arguments the
  717. * function takes). *)
  718. type proto =
  719. | Prototype of string * string array
  720. | BinOpPrototype of string * string array * int
  721. (* func - This type represents a function definition itself. *)
  722. type func = Function of proto * expr
  723. parser.ml:
  724. .. code-block:: ocaml
  725. (*===---------------------------------------------------------------------===
  726. * Parser
  727. *===---------------------------------------------------------------------===*)
  728. (* binop_precedence - This holds the precedence for each binary operator that is
  729. * defined *)
  730. let binop_precedence:(char, int) Hashtbl.t = Hashtbl.create 10
  731. (* precedence - Get the precedence of the pending binary operator token. *)
  732. let precedence c = try Hashtbl.find binop_precedence c with Not_found -> -1
  733. (* primary
  734. * ::= identifier
  735. * ::= numberexpr
  736. * ::= parenexpr
  737. * ::= ifexpr
  738. * ::= forexpr *)
  739. let rec parse_primary = parser
  740. (* numberexpr ::= number *)
  741. | [< 'Token.Number n >] -> Ast.Number n
  742. (* parenexpr ::= '(' expression ')' *)
  743. | [< 'Token.Kwd '('; e=parse_expr; 'Token.Kwd ')' ?? "expected ')'" >] -> e
  744. (* identifierexpr
  745. * ::= identifier
  746. * ::= identifier '(' argumentexpr ')' *)
  747. | [< 'Token.Ident id; stream >] ->
  748. let rec parse_args accumulator = parser
  749. | [< e=parse_expr; stream >] ->
  750. begin parser
  751. | [< 'Token.Kwd ','; e=parse_args (e :: accumulator) >] -> e
  752. | [< >] -> e :: accumulator
  753. end stream
  754. | [< >] -> accumulator
  755. in
  756. let rec parse_ident id = parser
  757. (* Call. *)
  758. | [< 'Token.Kwd '(';
  759. args=parse_args [];
  760. 'Token.Kwd ')' ?? "expected ')'">] ->
  761. Ast.Call (id, Array.of_list (List.rev args))
  762. (* Simple variable ref. *)
  763. | [< >] -> Ast.Variable id
  764. in
  765. parse_ident id stream
  766. (* ifexpr ::= 'if' expr 'then' expr 'else' expr *)
  767. | [< 'Token.If; c=parse_expr;
  768. 'Token.Then ?? "expected 'then'"; t=parse_expr;
  769. 'Token.Else ?? "expected 'else'"; e=parse_expr >] ->
  770. Ast.If (c, t, e)
  771. (* forexpr
  772. ::= 'for' identifier '=' expr ',' expr (',' expr)? 'in' expression *)
  773. | [< 'Token.For;
  774. 'Token.Ident id ?? "expected identifier after for";
  775. 'Token.Kwd '=' ?? "expected '=' after for";
  776. stream >] ->
  777. begin parser
  778. | [<
  779. start=parse_expr;
  780. 'Token.Kwd ',' ?? "expected ',' after for";
  781. end_=parse_expr;
  782. stream >] ->
  783. let step =
  784. begin parser
  785. | [< 'Token.Kwd ','; step=parse_expr >] -> Some step
  786. | [< >] -> None
  787. end stream
  788. in
  789. begin parser
  790. | [< 'Token.In; body=parse_expr >] ->
  791. Ast.For (id, start, end_, step, body)
  792. | [< >] ->
  793. raise (Stream.Error "expected 'in' after for")
  794. end stream
  795. | [< >] ->
  796. raise (Stream.Error "expected '=' after for")
  797. end stream
  798. | [< >] -> raise (Stream.Error "unknown token when expecting an expression.")
  799. (* unary
  800. * ::= primary
  801. * ::= '!' unary *)
  802. and parse_unary = parser
  803. (* If this is a unary operator, read it. *)
  804. | [< 'Token.Kwd op when op != '(' && op != ')'; operand=parse_expr >] ->
  805. Ast.Unary (op, operand)
  806. (* If the current token is not an operator, it must be a primary expr. *)
  807. | [< stream >] -> parse_primary stream
  808. (* binoprhs
  809. * ::= ('+' primary)* *)
  810. and parse_bin_rhs expr_prec lhs stream =
  811. match Stream.peek stream with
  812. (* If this is a binop, find its precedence. *)
  813. | Some (Token.Kwd c) when Hashtbl.mem binop_precedence c ->
  814. let token_prec = precedence c in
  815. (* If this is a binop that binds at least as tightly as the current binop,
  816. * consume it, otherwise we are done. *)
  817. if token_prec < expr_prec then lhs else begin
  818. (* Eat the binop. *)
  819. Stream.junk stream;
  820. (* Parse the unary expression after the binary operator. *)
  821. let rhs = parse_unary stream in
  822. (* Okay, we know this is a binop. *)
  823. let rhs =
  824. match Stream.peek stream with
  825. | Some (Token.Kwd c2) ->
  826. (* If BinOp binds less tightly with rhs than the operator after
  827. * rhs, let the pending operator take rhs as its lhs. *)
  828. let next_prec = precedence c2 in
  829. if token_prec < next_prec
  830. then parse_bin_rhs (token_prec + 1) rhs stream
  831. else rhs
  832. | _ -> rhs
  833. in
  834. (* Merge lhs/rhs. *)
  835. let lhs = Ast.Binary (c, lhs, rhs) in
  836. parse_bin_rhs expr_prec lhs stream
  837. end
  838. | _ -> lhs
  839. (* expression
  840. * ::= primary binoprhs *)
  841. and parse_expr = parser
  842. | [< lhs=parse_unary; stream >] -> parse_bin_rhs 0 lhs stream
  843. (* prototype
  844. * ::= id '(' id* ')'
  845. * ::= binary LETTER number? (id, id)
  846. * ::= unary LETTER number? (id) *)
  847. let parse_prototype =
  848. let rec parse_args accumulator = parser
  849. | [< 'Token.Ident id; e=parse_args (id::accumulator) >] -> e
  850. | [< >] -> accumulator
  851. in
  852. let parse_operator = parser
  853. | [< 'Token.Unary >] -> "unary", 1
  854. | [< 'Token.Binary >] -> "binary", 2
  855. in
  856. let parse_binary_precedence = parser
  857. | [< 'Token.Number n >] -> int_of_float n
  858. | [< >] -> 30
  859. in
  860. parser
  861. | [< 'Token.Ident id;
  862. 'Token.Kwd '(' ?? "expected '(' in prototype";
  863. args=parse_args [];
  864. 'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
  865. (* success. *)
  866. Ast.Prototype (id, Array.of_list (List.rev args))
  867. | [< (prefix, kind)=parse_operator;
  868. 'Token.Kwd op ?? "expected an operator";
  869. (* Read the precedence if present. *)
  870. binary_precedence=parse_binary_precedence;
  871. 'Token.Kwd '(' ?? "expected '(' in prototype";
  872. args=parse_args [];
  873. 'Token.Kwd ')' ?? "expected ')' in prototype" >] ->
  874. let name = prefix ^ (String.make 1 op) in
  875. let args = Array.of_list (List.rev args) in
  876. (* Verify right number of arguments for operator. *)
  877. if Array.length args != kind
  878. then raise (Stream.Error "invalid number of operands for operator")
  879. else
  880. if kind == 1 then
  881. Ast.Prototype (name, args)
  882. else
  883. Ast.BinOpPrototype (name, args, binary_precedence)
  884. | [< >] ->
  885. raise (Stream.Error "expected function name in prototype")
  886. (* definition ::= 'def' prototype expression *)
  887. let parse_definition = parser
  888. | [< 'Token.Def; p=parse_prototype; e=parse_expr >] ->
  889. Ast.Function (p, e)
  890. (* toplevelexpr ::= expression *)
  891. let parse_toplevel = parser
  892. | [< e=parse_expr >] ->
  893. (* Make an anonymous proto. *)
  894. Ast.Function (Ast.Prototype ("", [||]), e)
  895. (* external ::= 'extern' prototype *)
  896. let parse_extern = parser
  897. | [< 'Token.Extern; e=parse_prototype >] -> e
  898. codegen.ml:
  899. .. code-block:: ocaml
  900. (*===----------------------------------------------------------------------===
  901. * Code Generation
  902. *===----------------------------------------------------------------------===*)
  903. open Llvm
  904. exception Error of string
  905. let context = global_context ()
  906. let the_module = create_module context "my cool jit"
  907. let builder = builder context
  908. let named_values:(string, llvalue) Hashtbl.t = Hashtbl.create 10
  909. let double_type = double_type context
  910. let rec codegen_expr = function
  911. | Ast.Number n -> const_float double_type n
  912. | Ast.Variable name ->
  913. (try Hashtbl.find named_values name with
  914. | Not_found -> raise (Error "unknown variable name"))
  915. | Ast.Unary (op, operand) ->
  916. let operand = codegen_expr operand in
  917. let callee = "unary" ^ (String.make 1 op) in
  918. let callee =
  919. match lookup_function callee the_module with
  920. | Some callee -> callee
  921. | None -> raise (Error "unknown unary operator")
  922. in
  923. build_call callee [|operand|] "unop" builder
  924. | Ast.Binary (op, lhs, rhs) ->
  925. let lhs_val = codegen_expr lhs in
  926. let rhs_val = codegen_expr rhs in
  927. begin
  928. match op with
  929. | '+' -> build_add lhs_val rhs_val "addtmp" builder
  930. | '-' -> build_sub lhs_val rhs_val "subtmp" builder
  931. | '*' -> build_mul lhs_val rhs_val "multmp" builder
  932. | '<' ->
  933. (* Convert bool 0/1 to double 0.0 or 1.0 *)
  934. let i = build_fcmp Fcmp.Ult lhs_val rhs_val "cmptmp" builder in
  935. build_uitofp i double_type "booltmp" builder
  936. | _ ->
  937. (* If it wasn't a builtin binary operator, it must be a user defined
  938. * one. Emit a call to it. *)
  939. let callee = "binary" ^ (String.make 1 op) in
  940. let callee =
  941. match lookup_function callee the_module with
  942. | Some callee -> callee
  943. | None -> raise (Error "binary operator not found!")
  944. in
  945. build_call callee [|lhs_val; rhs_val|] "binop" builder
  946. end
  947. | Ast.Call (callee, args) ->
  948. (* Look up the name in the module table. *)
  949. let callee =
  950. match lookup_function callee the_module with
  951. | Some callee -> callee
  952. | None -> raise (Error "unknown function referenced")
  953. in
  954. let params = params callee in
  955. (* If argument mismatch error. *)
  956. if Array.length params == Array.length args then () else
  957. raise (Error "incorrect # arguments passed");
  958. let args = Array.map codegen_expr args in
  959. build_call callee args "calltmp" builder
  960. | Ast.If (cond, then_, else_) ->
  961. let cond = codegen_expr cond in
  962. (* Convert condition to a bool by comparing equal to 0.0 *)
  963. let zero = const_float double_type 0.0 in
  964. let cond_val = build_fcmp Fcmp.One cond zero "ifcond" builder in
  965. (* Grab the first block so that we might later add the conditional branch
  966. * to it at the end of the function. *)
  967. let start_bb = insertion_block builder in
  968. let the_function = block_parent start_bb in
  969. let then_bb = append_block context "then" the_function in
  970. (* Emit 'then' value. *)
  971. position_at_end then_bb builder;
  972. let then_val = codegen_expr then_ in
  973. (* Codegen of 'then' can change the current block, update then_bb for the
  974. * phi. We create a new name because one is used for the phi node, and the
  975. * other is used for the conditional branch. *)
  976. let new_then_bb = insertion_block builder in
  977. (* Emit 'else' value. *)
  978. let else_bb = append_block context "else" the_function in
  979. position_at_end else_bb builder;
  980. let else_val = codegen_expr else_ in
  981. (* Codegen of 'else' can change the current block, update else_bb for the
  982. * phi. *)
  983. let new_else_bb = insertion_block builder in
  984. (* Emit merge block. *)
  985. let merge_bb = append_block context "ifcont" the_function in
  986. position_at_end merge_bb builder;
  987. let incoming = [(then_val, new_then_bb); (else_val, new_else_bb)] in
  988. let phi = build_phi incoming "iftmp" builder in
  989. (* Return to the start block to add the conditional branch. *)
  990. position_at_end start_bb builder;
  991. ignore (build_cond_br cond_val then_bb else_bb builder);
  992. (* Set a unconditional branch at the end of the 'then' block and the
  993. * 'else' block to the 'merge' block. *)
  994. position_at_end new_then_bb builder; ignore (build_br merge_bb builder);
  995. position_at_end new_else_bb builder; ignore (build_br merge_bb builder);
  996. (* Finally, set the builder to the end of the merge block. *)
  997. position_at_end merge_bb builder;
  998. phi
  999. | Ast.For (var_name, start, end_, step, body) ->
  1000. (* Emit the start code first, without 'variable' in scope. *)
  1001. let start_val = codegen_expr start in
  1002. (* Make the new basic block for the loop header, inserting after current
  1003. * block. *)
  1004. let preheader_bb = insertion_block builder in
  1005. let the_function = block_parent preheader_bb in
  1006. let loop_bb = append_block context "loop" the_function in
  1007. (* Insert an explicit fall through from the current block to the
  1008. * loop_bb. *)
  1009. ignore (build_br loop_bb builder);
  1010. (* Start insertion in loop_bb. *)
  1011. position_at_end loop_bb builder;
  1012. (* Start the PHI node with an entry for start. *)
  1013. let variable = build_phi [(start_val, preheader_bb)] var_name builder in
  1014. (* Within the loop, the variable is defined equal to the PHI node. If it
  1015. * shadows an existing variable, we have to restore it, so save it
  1016. * now. *)
  1017. let old_val =
  1018. try Some (Hashtbl.find named_values var_name) with Not_found -> None
  1019. in
  1020. Hashtbl.add named_values var_name variable;
  1021. (* Emit the body of the loop. This, like any other expr, can change the
  1022. * current BB. Note that we ignore the value computed by the body, but
  1023. * don't allow an error *)
  1024. ignore (codegen_expr body);
  1025. (* Emit the step value. *)
  1026. let step_val =
  1027. match step with
  1028. | Some step -> codegen_expr step
  1029. (* If not specified, use 1.0. *)
  1030. | None -> const_float double_type 1.0
  1031. in
  1032. let next_var = build_add variable step_val "nextvar" builder in
  1033. (* Compute the end condition. *)
  1034. let end_cond = codegen_expr end_ in
  1035. (* Convert condition to a bool by comparing equal to 0.0. *)
  1036. let zero = const_float double_type 0.0 in
  1037. let end_cond = build_fcmp Fcmp.One end_cond zero "loopcond" builder in
  1038. (* Create the "after loop" block and insert it. *)
  1039. let loop_end_bb = insertion_block builder in
  1040. let after_bb = append_block context "afterloop" the_function in
  1041. (* Insert the conditional branch into the end of loop_end_bb. *)
  1042. ignore (build_cond_br end_cond loop_bb after_bb builder);
  1043. (* Any new code will be inserted in after_bb. *)
  1044. position_at_end after_bb builder;
  1045. (* Add a new entry to the PHI node for the backedge. *)
  1046. add_incoming (next_var, loop_end_bb) variable;
  1047. (* Restore the unshadowed variable. *)
  1048. begin match old_val with
  1049. | Some old_val -> Hashtbl.add named_values var_name old_val
  1050. | None -> ()
  1051. end;
  1052. (* for expr always returns 0.0. *)
  1053. const_null double_type
  1054. let codegen_proto = function
  1055. | Ast.Prototype (name, args) | Ast.BinOpPrototype (name, args, _) ->
  1056. (* Make the function type: double(double,double) etc. *)
  1057. let doubles = Array.make (Array.length args) double_type in
  1058. let ft = function_type double_type doubles in
  1059. let f =
  1060. match lookup_function name the_module with
  1061. | None -> declare_function name ft the_module
  1062. (* If 'f' conflicted, there was already something named 'name'. If it
  1063. * has a body, don't allow redefinition or reextern. *)
  1064. | Some f ->
  1065. (* If 'f' already has a body, reject this. *)
  1066. if block_begin f <> At_end f then
  1067. raise (Error "redefinition of function");
  1068. (* If 'f' took a different number of arguments, reject. *)
  1069. if element_type (type_of f) <> ft then
  1070. raise (Error "redefinition of function with different # args");
  1071. f
  1072. in
  1073. (* Set names for all arguments. *)
  1074. Array.iteri (fun i a ->
  1075. let n = args.(i) in
  1076. set_value_name n a;
  1077. Hashtbl.add named_values n a;
  1078. ) (params f);
  1079. f
  1080. let codegen_func the_fpm = function
  1081. | Ast.Function (proto, body) ->
  1082. Hashtbl.clear named_values;
  1083. let the_function = codegen_proto proto in
  1084. (* If this is an operator, install it. *)
  1085. begin match proto with
  1086. | Ast.BinOpPrototype (name, args, prec) ->
  1087. let op = name.[String.length name - 1] in
  1088. Hashtbl.add Parser.binop_precedence op prec;
  1089. | _ -> ()
  1090. end;
  1091. (* Create a new basic block to start insertion into. *)
  1092. let bb = append_block context "entry" the_function in
  1093. position_at_end bb builder;
  1094. try
  1095. let ret_val = codegen_expr body in
  1096. (* Finish off the function. *)
  1097. let _ = build_ret ret_val builder in
  1098. (* Validate the generated code, checking for consistency. *)
  1099. Llvm_analysis.assert_valid_function the_function;
  1100. (* Optimize the function. *)
  1101. let _ = PassManager.run_function the_function the_fpm in
  1102. the_function
  1103. with e ->
  1104. delete_function the_function;
  1105. raise e
  1106. toplevel.ml:
  1107. .. code-block:: ocaml
  1108. (*===----------------------------------------------------------------------===
  1109. * Top-Level parsing and JIT Driver
  1110. *===----------------------------------------------------------------------===*)
  1111. open Llvm
  1112. open Llvm_executionengine
  1113. (* top ::= definition | external | expression | ';' *)
  1114. let rec main_loop the_fpm the_execution_engine stream =
  1115. match Stream.peek stream with
  1116. | None -> ()
  1117. (* ignore top-level semicolons. *)
  1118. | Some (Token.Kwd ';') ->
  1119. Stream.junk stream;
  1120. main_loop the_fpm the_execution_engine stream
  1121. | Some token ->
  1122. begin
  1123. try match token with
  1124. | Token.Def ->
  1125. let e = Parser.parse_definition stream in
  1126. print_endline "parsed a function definition.";
  1127. dump_value (Codegen.codegen_func the_fpm e);
  1128. | Token.Extern ->
  1129. let e = Parser.parse_extern stream in
  1130. print_endline "parsed an extern.";
  1131. dump_value (Codegen.codegen_proto e);
  1132. | _ ->
  1133. (* Evaluate a top-level expression into an anonymous function. *)
  1134. let e = Parser.parse_toplevel stream in
  1135. print_endline "parsed a top-level expr";
  1136. let the_function = Codegen.codegen_func the_fpm e in
  1137. dump_value the_function;
  1138. (* JIT the function, returning a function pointer. *)
  1139. let result = ExecutionEngine.run_function the_function [||]
  1140. the_execution_engine in
  1141. print_string "Evaluated to ";
  1142. print_float (GenericValue.as_float Codegen.double_type result);
  1143. print_newline ();
  1144. with Stream.Error s | Codegen.Error s ->
  1145. (* Skip token for error recovery. *)
  1146. Stream.junk stream;
  1147. print_endline s;
  1148. end;
  1149. print_string "ready> "; flush stdout;
  1150. main_loop the_fpm the_execution_engine stream
  1151. toy.ml:
  1152. .. code-block:: ocaml
  1153. (*===----------------------------------------------------------------------===
  1154. * Main driver code.
  1155. *===----------------------------------------------------------------------===*)
  1156. open Llvm
  1157. open Llvm_executionengine
  1158. open Llvm_target
  1159. open Llvm_scalar_opts
  1160. let main () =
  1161. ignore (initialize_native_target ());
  1162. (* Install standard binary operators.
  1163. * 1 is the lowest precedence. *)
  1164. Hashtbl.add Parser.binop_precedence '<' 10;
  1165. Hashtbl.add Parser.binop_precedence '+' 20;
  1166. Hashtbl.add Parser.binop_precedence '-' 20;
  1167. Hashtbl.add Parser.binop_precedence '*' 40; (* highest. *)
  1168. (* Prime the first token. *)
  1169. print_string "ready> "; flush stdout;
  1170. let stream = Lexer.lex (Stream.of_channel stdin) in
  1171. (* Create the JIT. *)
  1172. let the_execution_engine = ExecutionEngine.create Codegen.the_module in
  1173. let the_fpm = PassManager.create_function Codegen.the_module in
  1174. (* Set up the optimizer pipeline. Start with registering info about how the
  1175. * target lays out data structures. *)
  1176. DataLayout.add (ExecutionEngine.target_data the_execution_engine) the_fpm;
  1177. (* Do simple "peephole" optimizations and bit-twiddling optzn. *)
  1178. add_instruction_combination the_fpm;
  1179. (* reassociate expressions. *)
  1180. add_reassociation the_fpm;
  1181. (* Eliminate Common SubExpressions. *)
  1182. add_gvn the_fpm;
  1183. (* Simplify the control flow graph (deleting unreachable blocks, etc). *)
  1184. add_cfg_simplification the_fpm;
  1185. ignore (PassManager.initialize the_fpm);
  1186. (* Run the main "interpreter loop" now. *)
  1187. Toplevel.main_loop the_fpm the_execution_engine stream;
  1188. (* Print out all the generated code. *)
  1189. dump_module Codegen.the_module
  1190. ;;
  1191. main ()
  1192. bindings.c
  1193. .. code-block:: c
  1194. #include <stdio.h>
  1195. /* putchard - putchar that takes a double and returns 0. */
  1196. extern double putchard(double X) {
  1197. putchar((char)X);
  1198. return 0;
  1199. }
  1200. /* printd - printf that takes a double prints it as "%f\n", returning 0. */
  1201. extern double printd(double X) {
  1202. printf("%f\n", X);
  1203. return 0;
  1204. }
  1205. `Next: Extending the language: mutable variables / SSA
  1206. construction <OCamlLangImpl7.html>`_