NeonEmitter.cpp 78 KB


  1. //===- NeonEmitter.cpp - Generate arm_neon.h for use with clang -*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This tablegen backend is responsible for emitting arm_neon.h, which includes
  10. // a declaration and definition of each function specified by the ARM NEON
  11. // compiler interface. See ARM document DUI0348B.
  12. //
  13. // Each NEON instruction is implemented in terms of 1 or more functions which
  14. // are suffixed with the element type of the input vectors. Functions may be
  15. // implemented in terms of generic vector operations such as +, *, -, etc. or
  16. // by calling a __builtin_-prefixed function which will be handled by clang's
  17. // CodeGen library.
  18. //
  19. // Additional validation code can be generated by this file when runHeader() is
  20. // called, rather than the normal run() entry point.
  21. //
  22. // See also the documentation in include/clang/Basic/arm_neon.td.
  23. //
  24. //===----------------------------------------------------------------------===//
  25. #include "TableGenBackends.h"
  26. #include "llvm/ADT/ArrayRef.h"
  27. #include "llvm/ADT/DenseMap.h"
  28. #include "llvm/ADT/None.h"
  29. #include "llvm/ADT/SmallVector.h"
  30. #include "llvm/ADT/STLExtras.h"
  31. #include "llvm/ADT/StringExtras.h"
  32. #include "llvm/ADT/StringRef.h"
  33. #include "llvm/Support/Casting.h"
  34. #include "llvm/Support/ErrorHandling.h"
  35. #include "llvm/Support/raw_ostream.h"
  36. #include "llvm/TableGen/Error.h"
  37. #include "llvm/TableGen/Record.h"
  38. #include "llvm/TableGen/SetTheory.h"
  39. #include <algorithm>
  40. #include <cassert>
  41. #include <cctype>
  42. #include <cstddef>
  43. #include <cstdint>
  44. #include <deque>
  45. #include <map>
  46. #include <set>
  47. #include <sstream>
  48. #include <string>
  49. #include <utility>
  50. #include <vector>
  51. using namespace llvm;
  52. namespace {
  53. // While globals are generally bad, this one allows us to perform assertions
  54. // liberally and somehow still trace them back to the def they indirectly
  55. // came from.
  56. static Record *CurrentRecord = nullptr;
  57. static void assert_with_loc(bool Assertion, const std::string &Str) {
  58. if (!Assertion) {
  59. if (CurrentRecord)
  60. PrintFatalError(CurrentRecord->getLoc(), Str);
  61. else
  62. PrintFatalError(Str);
  63. }
  64. }
  65. enum ClassKind {
  66. ClassNone,
  67. ClassI, // generic integer instruction, e.g., "i8" suffix
  68. ClassS, // signed/unsigned/poly, e.g., "s8", "u8" or "p8" suffix
  69. ClassW, // width-specific instruction, e.g., "8" suffix
  70. ClassB, // bitcast arguments with enum argument to specify type
  71. ClassL, // Logical instructions which are op instructions
  72. // but we need to not emit any suffix for in our
  73. // tests.
  74. ClassNoTest // Instructions which we do not test since they are
  75. // not TRUE instructions.
  76. };
  77. /// NeonTypeFlags - Flags to identify the types for overloaded Neon
  78. /// builtins. These must be kept in sync with the flags in
  79. /// include/clang/Basic/TargetBuiltins.h.
  80. namespace NeonTypeFlags {
  81. enum { EltTypeMask = 0xf, UnsignedFlag = 0x10, QuadFlag = 0x20 };
  82. enum EltType {
  83. Int8,
  84. Int16,
  85. Int32,
  86. Int64,
  87. Poly8,
  88. Poly16,
  89. Poly64,
  90. Poly128,
  91. Float16,
  92. Float32,
  93. Float64
  94. };
  95. } // end namespace NeonTypeFlags
  96. class NeonEmitter;
  97. //===----------------------------------------------------------------------===//
  98. // TypeSpec
  99. //===----------------------------------------------------------------------===//
  100. /// A TypeSpec is just a simple wrapper around a string, but gets its own type
  101. /// for strong typing purposes.
  102. ///
  103. /// A TypeSpec can be used to create a type.
  104. class TypeSpec : public std::string {
  105. public:
  106. static std::vector<TypeSpec> fromTypeSpecs(StringRef Str) {
  107. std::vector<TypeSpec> Ret;
  108. TypeSpec Acc;
  109. for (char I : Str.str()) {
  110. if (islower(I)) {
  111. Acc.push_back(I);
  112. Ret.push_back(TypeSpec(Acc));
  113. Acc.clear();
  114. } else {
  115. Acc.push_back(I);
  116. }
  117. }
  118. return Ret;
  119. }
  120. };
  121. //===----------------------------------------------------------------------===//
  122. // Type
  123. //===----------------------------------------------------------------------===//
  124. /// A Type. Not much more to say here.
  125. class Type {
  126. private:
  127. TypeSpec TS;
  128. bool Float, Signed, Immediate, Void, Poly, Constant, Pointer;
  129. // ScalarForMangling and NoManglingQ are really not suited to live here as
  130. // they are not related to the type. But they live in the TypeSpec (not the
  131. // prototype), so this is really the only place to store them.
  132. bool ScalarForMangling, NoManglingQ;
  133. unsigned Bitwidth, ElementBitwidth, NumVectors;
  134. public:
  135. Type()
  136. : Float(false), Signed(false), Immediate(false), Void(true), Poly(false),
  137. Constant(false), Pointer(false), ScalarForMangling(false),
  138. NoManglingQ(false), Bitwidth(0), ElementBitwidth(0), NumVectors(0) {}
  139. Type(TypeSpec TS, char CharMod)
  140. : TS(std::move(TS)), Float(false), Signed(false), Immediate(false),
  141. Void(false), Poly(false), Constant(false), Pointer(false),
  142. ScalarForMangling(false), NoManglingQ(false), Bitwidth(0),
  143. ElementBitwidth(0), NumVectors(0) {
  144. applyModifier(CharMod);
  145. }
  146. /// Returns a type representing "void".
  147. static Type getVoid() { return Type(); }
  148. bool operator==(const Type &Other) const { return str() == Other.str(); }
  149. bool operator!=(const Type &Other) const { return !operator==(Other); }
  150. //
  151. // Query functions
  152. //
  153. bool isScalarForMangling() const { return ScalarForMangling; }
  154. bool noManglingQ() const { return NoManglingQ; }
  155. bool isPointer() const { return Pointer; }
  156. bool isFloating() const { return Float; }
  157. bool isInteger() const { return !Float && !Poly; }
  158. bool isSigned() const { return Signed; }
  159. bool isImmediate() const { return Immediate; }
  160. bool isScalar() const { return NumVectors == 0; }
  161. bool isVector() const { return NumVectors > 0; }
  162. bool isFloat() const { return Float && ElementBitwidth == 32; }
  163. bool isDouble() const { return Float && ElementBitwidth == 64; }
  164. bool isHalf() const { return Float && ElementBitwidth == 16; }
  165. bool isPoly() const { return Poly; }
  166. bool isChar() const { return ElementBitwidth == 8; }
  167. bool isShort() const { return !Float && ElementBitwidth == 16; }
  168. bool isInt() const { return !Float && ElementBitwidth == 32; }
  169. bool isLong() const { return !Float && ElementBitwidth == 64; }
  170. bool isVoid() const { return Void; }
  171. unsigned getNumElements() const { return Bitwidth / ElementBitwidth; }
  172. unsigned getSizeInBits() const { return Bitwidth; }
  173. unsigned getElementSizeInBits() const { return ElementBitwidth; }
  174. unsigned getNumVectors() const { return NumVectors; }
  175. //
  176. // Mutator functions
  177. //
  178. void makeUnsigned() { Signed = false; }
  179. void makeSigned() { Signed = true; }
  180. void makeInteger(unsigned ElemWidth, bool Sign) {
  181. Float = false;
  182. Poly = false;
  183. Signed = Sign;
  184. Immediate = false;
  185. ElementBitwidth = ElemWidth;
  186. }
  187. void makeImmediate(unsigned ElemWidth) {
  188. Float = false;
  189. Poly = false;
  190. Signed = true;
  191. Immediate = true;
  192. ElementBitwidth = ElemWidth;
  193. }
  194. void makeScalar() {
  195. Bitwidth = ElementBitwidth;
  196. NumVectors = 0;
  197. }
  198. void makeOneVector() {
  199. assert(isVector());
  200. NumVectors = 1;
  201. }
  202. void doubleLanes() {
  203. assert_with_loc(Bitwidth != 128, "Can't get bigger than 128!");
  204. Bitwidth = 128;
  205. }
  206. void halveLanes() {
  207. assert_with_loc(Bitwidth != 64, "Can't get smaller than 64!");
  208. Bitwidth = 64;
  209. }
  210. /// Return the C string representation of a type, which is the typename
  211. /// defined in stdint.h or arm_neon.h.
  212. std::string str() const;
  213. /// Return the string representation of a type, which is an encoded
  214. /// string for passing to the BUILTIN() macro in Builtins.def.
  215. std::string builtin_str() const;
  216. /// Return the value in NeonTypeFlags for this type.
  217. unsigned getNeonEnum() const;
  218. /// Parse a type from a stdint.h or arm_neon.h typedef name,
  219. /// for example uint32x2_t or int64_t.
  220. static Type fromTypedefName(StringRef Name);
  221. private:
  222. /// Creates the type based on the typespec string in TS.
  223. /// Sets "Quad" to true if the "Q" or "H" modifiers were
  224. /// seen. This is needed by applyModifier as some modifiers
  225. /// only take effect if the type size was changed by "Q" or "H".
  226. void applyTypespec(bool &Quad);
  227. /// Applies a prototype modifier to the type.
  228. void applyModifier(char Mod);
  229. };
  230. //===----------------------------------------------------------------------===//
  231. // Variable
  232. //===----------------------------------------------------------------------===//
  233. /// A variable is a simple class that just has a type and a name.
  234. class Variable {
  235. Type T;
  236. std::string N;
  237. public:
  238. Variable() : T(Type::getVoid()), N("") {}
  239. Variable(Type T, std::string N) : T(std::move(T)), N(std::move(N)) {}
  240. Type getType() const { return T; }
  241. std::string getName() const { return "__" + N; }
  242. };
  243. //===----------------------------------------------------------------------===//
  244. // Intrinsic
  245. //===----------------------------------------------------------------------===//
  246. /// The main grunt class. This represents an instantiation of an intrinsic with
  247. /// a particular typespec and prototype.
  248. class Intrinsic {
  249. friend class DagEmitter;
  250. /// The Record this intrinsic was created from.
  251. Record *R;
  252. /// The unmangled name and prototype.
  253. std::string Name, Proto;
  254. /// The input and output typespecs. InTS == OutTS except when
  255. /// CartesianProductOfTypes is 1 - this is the case for vreinterpret.
  256. TypeSpec OutTS, InTS;
  257. /// The base class kind. Most intrinsics use ClassS, which has full type
  258. /// info for integers (s32/u32). Some use ClassI, which doesn't care about
  259. /// signedness (i32), while some (ClassB) have no type at all, only a width
  260. /// (32).
  261. ClassKind CK;
  262. /// The list of DAGs for the body. May be empty, in which case we should
  263. /// emit a builtin call.
  264. ListInit *Body;
  265. /// The architectural #ifdef guard.
  266. std::string Guard;
  267. /// Set if the Unavailable bit is 1. This means we don't generate a body,
  268. /// just an "unavailable" attribute on a declaration.
  269. bool IsUnavailable;
  270. /// Is this intrinsic safe for big-endian? or does it need its arguments
  271. /// reversing?
  272. bool BigEndianSafe;
  273. /// The types of return value [0] and parameters [1..].
  274. std::vector<Type> Types;
  275. /// The local variables defined.
  276. std::map<std::string, Variable> Variables;
  277. /// NeededEarly - set if any other intrinsic depends on this intrinsic.
  278. bool NeededEarly;
  279. /// UseMacro - set if we should implement using a macro or unset for a
  280. /// function.
  281. bool UseMacro;
  282. /// The set of intrinsics that this intrinsic uses/requires.
  283. std::set<Intrinsic *> Dependencies;
  284. /// The "base type", which is Type('d', OutTS). InBaseType is only
  285. /// different if CartesianProductOfTypes = 1 (for vreinterpret).
  286. Type BaseType, InBaseType;
  287. /// The return variable.
  288. Variable RetVar;
  289. /// A postfix to apply to every variable. Defaults to "".
  290. std::string VariablePostfix;
  291. NeonEmitter &Emitter;
  292. std::stringstream OS;
  293. bool isBigEndianSafe() const {
  294. if (BigEndianSafe)
  295. return true;
  296. for (const auto &T : Types){
  297. if (T.isVector() && T.getNumElements() > 1)
  298. return false;
  299. }
  300. return true;
  301. }
  302. public:
  303. Intrinsic(Record *R, StringRef Name, StringRef Proto, TypeSpec OutTS,
  304. TypeSpec InTS, ClassKind CK, ListInit *Body, NeonEmitter &Emitter,
  305. StringRef Guard, bool IsUnavailable, bool BigEndianSafe)
  306. : R(R), Name(Name.str()), Proto(Proto.str()), OutTS(OutTS), InTS(InTS),
  307. CK(CK), Body(Body), Guard(Guard.str()), IsUnavailable(IsUnavailable),
  308. BigEndianSafe(BigEndianSafe), NeededEarly(false), UseMacro(false),
  309. BaseType(OutTS, 'd'), InBaseType(InTS, 'd'), Emitter(Emitter) {
  310. // If this builtin takes an immediate argument, we need to #define it rather
  311. // than use a standard declaration, so that SemaChecking can range check
  312. // the immediate passed by the user.
  313. if (Proto.find('i') != std::string::npos)
  314. UseMacro = true;
  315. // Pointer arguments need to use macros to avoid hiding aligned attributes
  316. // from the pointer type.
  317. if (Proto.find('p') != std::string::npos ||
  318. Proto.find('c') != std::string::npos)
  319. UseMacro = true;
  320. // It is not permitted to pass or return an __fp16 by value, so intrinsics
  321. // taking a scalar float16_t must be implemented as macros.
  322. if (OutTS.find('h') != std::string::npos &&
  323. Proto.find('s') != std::string::npos)
  324. UseMacro = true;
  325. // Modify the TypeSpec per-argument to get a concrete Type, and create
  326. // known variables for each.
  327. // Types[0] is the return value.
  328. Types.emplace_back(OutTS, Proto[0]);
  329. for (unsigned I = 1; I < Proto.size(); ++I)
  330. Types.emplace_back(InTS, Proto[I]);
  331. }
  332. /// Get the Record that this intrinsic is based off.
  333. Record *getRecord() const { return R; }
  334. /// Get the set of Intrinsics that this intrinsic calls.
  335. /// this is the set of immediate dependencies, NOT the
  336. /// transitive closure.
  337. const std::set<Intrinsic *> &getDependencies() const { return Dependencies; }
  338. /// Get the architectural guard string (#ifdef).
  339. std::string getGuard() const { return Guard; }
  340. /// Get the non-mangled name.
  341. std::string getName() const { return Name; }
  342. /// Return true if the intrinsic takes an immediate operand.
  343. bool hasImmediate() const {
  344. return Proto.find('i') != std::string::npos;
  345. }
  346. /// Return the parameter index of the immediate operand.
  347. unsigned getImmediateIdx() const {
  348. assert(hasImmediate());
  349. unsigned Idx = Proto.find('i');
  350. assert(Idx > 0 && "Can't return an immediate!");
  351. return Idx - 1;
  352. }
  353. /// Return true if the intrinsic takes an splat operand.
  354. bool hasSplat() const { return Proto.find('a') != std::string::npos; }
  355. /// Return the parameter index of the splat operand.
  356. unsigned getSplatIdx() const {
  357. assert(hasSplat());
  358. unsigned Idx = Proto.find('a');
  359. assert(Idx > 0 && "Can't return a splat!");
  360. return Idx - 1;
  361. }
  362. unsigned getNumParams() const { return Proto.size() - 1; }
  363. Type getReturnType() const { return Types[0]; }
  364. Type getParamType(unsigned I) const { return Types[I + 1]; }
  365. Type getBaseType() const { return BaseType; }
  366. /// Return the raw prototype string.
  367. std::string getProto() const { return Proto; }
  368. /// Return true if the prototype has a scalar argument.
  369. /// This does not return true for the "splat" code ('a').
  370. bool protoHasScalar() const;
  371. /// Return the index that parameter PIndex will sit at
  372. /// in a generated function call. This is often just PIndex,
  373. /// but may not be as things such as multiple-vector operands
  374. /// and sret parameters need to be taken into accont.
  375. unsigned getGeneratedParamIdx(unsigned PIndex) {
  376. unsigned Idx = 0;
  377. if (getReturnType().getNumVectors() > 1)
  378. // Multiple vectors are passed as sret.
  379. ++Idx;
  380. for (unsigned I = 0; I < PIndex; ++I)
  381. Idx += std::max(1U, getParamType(I).getNumVectors());
  382. return Idx;
  383. }
  384. bool hasBody() const { return Body && !Body->getValues().empty(); }
  385. void setNeededEarly() { NeededEarly = true; }
  386. bool operator<(const Intrinsic &Other) const {
  387. // Sort lexicographically on a two-tuple (Guard, Name)
  388. if (Guard != Other.Guard)
  389. return Guard < Other.Guard;
  390. return Name < Other.Name;
  391. }
  392. ClassKind getClassKind(bool UseClassBIfScalar = false) {
  393. if (UseClassBIfScalar && !protoHasScalar())
  394. return ClassB;
  395. return CK;
  396. }
  397. /// Return the name, mangled with type information.
  398. /// If ForceClassS is true, use ClassS (u32/s32) instead
  399. /// of the intrinsic's own type class.
  400. std::string getMangledName(bool ForceClassS = false) const;
  401. /// Return the type code for a builtin function call.
  402. std::string getInstTypeCode(Type T, ClassKind CK) const;
  403. /// Return the type string for a BUILTIN() macro in Builtins.def.
  404. std::string getBuiltinTypeStr();
  405. /// Generate the intrinsic, returning code.
  406. std::string generate();
  407. /// Perform type checking and populate the dependency graph, but
  408. /// don't generate code yet.
  409. void indexBody();
  410. private:
  411. std::string mangleName(std::string Name, ClassKind CK) const;
  412. void initVariables();
  413. std::string replaceParamsIn(std::string S);
  414. void emitBodyAsBuiltinCall();
  415. void generateImpl(bool ReverseArguments,
  416. StringRef NamePrefix, StringRef CallPrefix);
  417. void emitReturn();
  418. void emitBody(StringRef CallPrefix);
  419. void emitShadowedArgs();
  420. void emitArgumentReversal();
  421. void emitReturnReversal();
  422. void emitReverseVariable(Variable &Dest, Variable &Src);
  423. void emitNewLine();
  424. void emitClosingBrace();
  425. void emitOpeningBrace();
  426. void emitPrototype(StringRef NamePrefix);
  427. class DagEmitter {
  428. Intrinsic &Intr;
  429. StringRef CallPrefix;
  430. public:
  431. DagEmitter(Intrinsic &Intr, StringRef CallPrefix) :
  432. Intr(Intr), CallPrefix(CallPrefix) {
  433. }
  434. std::pair<Type, std::string> emitDagArg(Init *Arg, std::string ArgName);
  435. std::pair<Type, std::string> emitDagSaveTemp(DagInit *DI);
  436. std::pair<Type, std::string> emitDagSplat(DagInit *DI);
  437. std::pair<Type, std::string> emitDagDup(DagInit *DI);
  438. std::pair<Type, std::string> emitDagDupTyped(DagInit *DI);
  439. std::pair<Type, std::string> emitDagShuffle(DagInit *DI);
  440. std::pair<Type, std::string> emitDagCast(DagInit *DI, bool IsBitCast);
  441. std::pair<Type, std::string> emitDagCall(DagInit *DI);
  442. std::pair<Type, std::string> emitDagNameReplace(DagInit *DI);
  443. std::pair<Type, std::string> emitDagLiteral(DagInit *DI);
  444. std::pair<Type, std::string> emitDagOp(DagInit *DI);
  445. std::pair<Type, std::string> emitDag(DagInit *DI);
  446. };
  447. };
  448. //===----------------------------------------------------------------------===//
  449. // NeonEmitter
  450. //===----------------------------------------------------------------------===//
  451. class NeonEmitter {
  452. RecordKeeper &Records;
  453. DenseMap<Record *, ClassKind> ClassMap;
  454. std::map<std::string, std::deque<Intrinsic>> IntrinsicMap;
  455. unsigned UniqueNumber;
  456. void createIntrinsic(Record *R, SmallVectorImpl<Intrinsic *> &Out);
  457. void genBuiltinsDef(raw_ostream &OS, SmallVectorImpl<Intrinsic *> &Defs);
  458. void genOverloadTypeCheckCode(raw_ostream &OS,
  459. SmallVectorImpl<Intrinsic *> &Defs);
  460. void genIntrinsicRangeCheckCode(raw_ostream &OS,
  461. SmallVectorImpl<Intrinsic *> &Defs);
  462. public:
  463. /// Called by Intrinsic - this attempts to get an intrinsic that takes
  464. /// the given types as arguments.
  465. Intrinsic &getIntrinsic(StringRef Name, ArrayRef<Type> Types);
  466. /// Called by Intrinsic - returns a globally-unique number.
  467. unsigned getUniqueNumber() { return UniqueNumber++; }
  468. NeonEmitter(RecordKeeper &R) : Records(R), UniqueNumber(0) {
  469. Record *SI = R.getClass("SInst");
  470. Record *II = R.getClass("IInst");
  471. Record *WI = R.getClass("WInst");
  472. Record *SOpI = R.getClass("SOpInst");
  473. Record *IOpI = R.getClass("IOpInst");
  474. Record *WOpI = R.getClass("WOpInst");
  475. Record *LOpI = R.getClass("LOpInst");
  476. Record *NoTestOpI = R.getClass("NoTestOpInst");
  477. ClassMap[SI] = ClassS;
  478. ClassMap[II] = ClassI;
  479. ClassMap[WI] = ClassW;
  480. ClassMap[SOpI] = ClassS;
  481. ClassMap[IOpI] = ClassI;
  482. ClassMap[WOpI] = ClassW;
  483. ClassMap[LOpI] = ClassL;
  484. ClassMap[NoTestOpI] = ClassNoTest;
  485. }
  486. // run - Emit arm_neon.h.inc
  487. void run(raw_ostream &o);
  488. // runFP16 - Emit arm_fp16.h.inc
  489. void runFP16(raw_ostream &o);
  490. // runHeader - Emit all the __builtin prototypes used in arm_neon.h
  491. // and arm_fp16.h
  492. void runHeader(raw_ostream &o);
  493. // runTests - Emit tests for all the Neon intrinsics.
  494. void runTests(raw_ostream &o);
  495. };
  496. } // end anonymous namespace
  497. //===----------------------------------------------------------------------===//
  498. // Type implementation
  499. //===----------------------------------------------------------------------===//
  500. std::string Type::str() const {
  501. if (Void)
  502. return "void";
  503. std::string S;
  504. if (!Signed && isInteger())
  505. S += "u";
  506. if (Poly)
  507. S += "poly";
  508. else if (Float)
  509. S += "float";
  510. else
  511. S += "int";
  512. S += utostr(ElementBitwidth);
  513. if (isVector())
  514. S += "x" + utostr(getNumElements());
  515. if (NumVectors > 1)
  516. S += "x" + utostr(NumVectors);
  517. S += "_t";
  518. if (Constant)
  519. S += " const";
  520. if (Pointer)
  521. S += " *";
  522. return S;
  523. }
  524. std::string Type::builtin_str() const {
  525. std::string S;
  526. if (isVoid())
  527. return "v";
  528. if (Pointer)
  529. // All pointers are void pointers.
  530. S += "v";
  531. else if (isInteger())
  532. switch (ElementBitwidth) {
  533. case 8: S += "c"; break;
  534. case 16: S += "s"; break;
  535. case 32: S += "i"; break;
  536. case 64: S += "Wi"; break;
  537. case 128: S += "LLLi"; break;
  538. default: llvm_unreachable("Unhandled case!");
  539. }
  540. else
  541. switch (ElementBitwidth) {
  542. case 16: S += "h"; break;
  543. case 32: S += "f"; break;
  544. case 64: S += "d"; break;
  545. default: llvm_unreachable("Unhandled case!");
  546. }
  547. if (isChar() && !Pointer && Signed)
  548. // Make chars explicitly signed.
  549. S = "S" + S;
  550. else if (isInteger() && !Pointer && !Signed)
  551. S = "U" + S;
  552. // Constant indices are "int", but have the "constant expression" modifier.
  553. if (isImmediate()) {
  554. assert(isInteger() && isSigned());
  555. S = "I" + S;
  556. }
  557. if (isScalar()) {
  558. if (Constant) S += "C";
  559. if (Pointer) S += "*";
  560. return S;
  561. }
  562. std::string Ret;
  563. for (unsigned I = 0; I < NumVectors; ++I)
  564. Ret += "V" + utostr(getNumElements()) + S;
  565. return Ret;
  566. }
  567. unsigned Type::getNeonEnum() const {
  568. unsigned Addend;
  569. switch (ElementBitwidth) {
  570. case 8: Addend = 0; break;
  571. case 16: Addend = 1; break;
  572. case 32: Addend = 2; break;
  573. case 64: Addend = 3; break;
  574. case 128: Addend = 4; break;
  575. default: llvm_unreachable("Unhandled element bitwidth!");
  576. }
  577. unsigned Base = (unsigned)NeonTypeFlags::Int8 + Addend;
  578. if (Poly) {
  579. // Adjustment needed because Poly32 doesn't exist.
  580. if (Addend >= 2)
  581. --Addend;
  582. Base = (unsigned)NeonTypeFlags::Poly8 + Addend;
  583. }
  584. if (Float) {
  585. assert(Addend != 0 && "Float8 doesn't exist!");
  586. Base = (unsigned)NeonTypeFlags::Float16 + (Addend - 1);
  587. }
  588. if (Bitwidth == 128)
  589. Base |= (unsigned)NeonTypeFlags::QuadFlag;
  590. if (isInteger() && !Signed)
  591. Base |= (unsigned)NeonTypeFlags::UnsignedFlag;
  592. return Base;
  593. }
  594. Type Type::fromTypedefName(StringRef Name) {
  595. Type T;
  596. T.Void = false;
  597. T.Float = false;
  598. T.Poly = false;
  599. if (Name.front() == 'u') {
  600. T.Signed = false;
  601. Name = Name.drop_front();
  602. } else {
  603. T.Signed = true;
  604. }
  605. if (Name.startswith("float")) {
  606. T.Float = true;
  607. Name = Name.drop_front(5);
  608. } else if (Name.startswith("poly")) {
  609. T.Poly = true;
  610. Name = Name.drop_front(4);
  611. } else {
  612. assert(Name.startswith("int"));
  613. Name = Name.drop_front(3);
  614. }
  615. unsigned I = 0;
  616. for (I = 0; I < Name.size(); ++I) {
  617. if (!isdigit(Name[I]))
  618. break;
  619. }
  620. Name.substr(0, I).getAsInteger(10, T.ElementBitwidth);
  621. Name = Name.drop_front(I);
  622. T.Bitwidth = T.ElementBitwidth;
  623. T.NumVectors = 1;
  624. if (Name.front() == 'x') {
  625. Name = Name.drop_front();
  626. unsigned I = 0;
  627. for (I = 0; I < Name.size(); ++I) {
  628. if (!isdigit(Name[I]))
  629. break;
  630. }
  631. unsigned NumLanes;
  632. Name.substr(0, I).getAsInteger(10, NumLanes);
  633. Name = Name.drop_front(I);
  634. T.Bitwidth = T.ElementBitwidth * NumLanes;
  635. } else {
  636. // Was scalar.
  637. T.NumVectors = 0;
  638. }
  639. if (Name.front() == 'x') {
  640. Name = Name.drop_front();
  641. unsigned I = 0;
  642. for (I = 0; I < Name.size(); ++I) {
  643. if (!isdigit(Name[I]))
  644. break;
  645. }
  646. Name.substr(0, I).getAsInteger(10, T.NumVectors);
  647. Name = Name.drop_front(I);
  648. }
  649. assert(Name.startswith("_t") && "Malformed typedef!");
  650. return T;
  651. }
  652. void Type::applyTypespec(bool &Quad) {
  653. std::string S = TS;
  654. ScalarForMangling = false;
  655. Void = false;
  656. Poly = Float = false;
  657. ElementBitwidth = ~0U;
  658. Signed = true;
  659. NumVectors = 1;
  660. for (char I : S) {
  661. switch (I) {
  662. case 'S':
  663. ScalarForMangling = true;
  664. break;
  665. case 'H':
  666. NoManglingQ = true;
  667. Quad = true;
  668. break;
  669. case 'Q':
  670. Quad = true;
  671. break;
  672. case 'P':
  673. Poly = true;
  674. break;
  675. case 'U':
  676. Signed = false;
  677. break;
  678. case 'c':
  679. ElementBitwidth = 8;
  680. break;
  681. case 'h':
  682. Float = true;
  683. LLVM_FALLTHROUGH;
  684. case 's':
  685. ElementBitwidth = 16;
  686. break;
  687. case 'f':
  688. Float = true;
  689. LLVM_FALLTHROUGH;
  690. case 'i':
  691. ElementBitwidth = 32;
  692. break;
  693. case 'd':
  694. Float = true;
  695. LLVM_FALLTHROUGH;
  696. case 'l':
  697. ElementBitwidth = 64;
  698. break;
  699. case 'k':
  700. ElementBitwidth = 128;
  701. // Poly doesn't have a 128x1 type.
  702. if (Poly)
  703. NumVectors = 0;
  704. break;
  705. default:
  706. llvm_unreachable("Unhandled type code!");
  707. }
  708. }
  709. assert(ElementBitwidth != ~0U && "Bad element bitwidth!");
  710. Bitwidth = Quad ? 128 : 64;
  711. }
  712. void Type::applyModifier(char Mod) {
  713. bool AppliedQuad = false;
  714. applyTypespec(AppliedQuad);
  715. switch (Mod) {
  716. case 'v':
  717. Void = true;
  718. break;
  719. case 't':
  720. if (Poly) {
  721. Poly = false;
  722. Signed = false;
  723. }
  724. break;
  725. case 'b':
  726. Signed = false;
  727. Float = false;
  728. Poly = false;
  729. NumVectors = 0;
  730. Bitwidth = ElementBitwidth;
  731. break;
  732. case '$':
  733. Signed = true;
  734. Float = false;
  735. Poly = false;
  736. NumVectors = 0;
  737. Bitwidth = ElementBitwidth;
  738. break;
  739. case 'u':
  740. Signed = false;
  741. Poly = false;
  742. Float = false;
  743. break;
  744. case 'x':
  745. Signed = true;
  746. assert(!Poly && "'u' can't be used with poly types!");
  747. Float = false;
  748. break;
  749. case 'o':
  750. Bitwidth = ElementBitwidth = 64;
  751. NumVectors = 0;
  752. Float = true;
  753. break;
  754. case 'y':
  755. Bitwidth = ElementBitwidth = 32;
  756. NumVectors = 0;
  757. Float = true;
  758. break;
  759. case 'Y':
  760. Bitwidth = ElementBitwidth = 16;
  761. NumVectors = 0;
  762. Float = true;
  763. break;
  764. case 'I':
  765. Bitwidth = ElementBitwidth = 32;
  766. NumVectors = 0;
  767. Float = false;
  768. Signed = true;
  769. break;
  770. case 'L':
  771. Bitwidth = ElementBitwidth = 64;
  772. NumVectors = 0;
  773. Float = false;
  774. Signed = true;
  775. break;
  776. case 'U':
  777. Bitwidth = ElementBitwidth = 32;
  778. NumVectors = 0;
  779. Float = false;
  780. Signed = false;
  781. break;
  782. case 'O':
  783. Bitwidth = ElementBitwidth = 64;
  784. NumVectors = 0;
  785. Float = false;
  786. Signed = false;
  787. break;
  788. case 'f':
  789. Float = true;
  790. ElementBitwidth = 32;
  791. break;
  792. case 'F':
  793. Float = true;
  794. ElementBitwidth = 64;
  795. break;
  796. case 'H':
  797. Float = true;
  798. ElementBitwidth = 16;
  799. break;
  800. case '0':
  801. Float = true;
  802. if (AppliedQuad)
  803. Bitwidth /= 2;
  804. ElementBitwidth = 16;
  805. break;
  806. case '1':
  807. Float = true;
  808. if (!AppliedQuad)
  809. Bitwidth *= 2;
  810. ElementBitwidth = 16;
  811. break;
  812. case 'g':
  813. if (AppliedQuad)
  814. Bitwidth /= 2;
  815. break;
  816. case 'j':
  817. if (!AppliedQuad)
  818. Bitwidth *= 2;
  819. break;
  820. case 'w':
  821. ElementBitwidth *= 2;
  822. Bitwidth *= 2;
  823. break;
  824. case 'n':
  825. ElementBitwidth *= 2;
  826. break;
  827. case 'i':
  828. Float = false;
  829. Poly = false;
  830. ElementBitwidth = Bitwidth = 32;
  831. NumVectors = 0;
  832. Signed = true;
  833. Immediate = true;
  834. break;
  835. case 'l':
  836. Float = false;
  837. Poly = false;
  838. ElementBitwidth = Bitwidth = 64;
  839. NumVectors = 0;
  840. Signed = false;
  841. Immediate = true;
  842. break;
  843. case 'z':
  844. ElementBitwidth /= 2;
  845. Bitwidth = ElementBitwidth;
  846. NumVectors = 0;
  847. break;
  848. case 'r':
  849. ElementBitwidth *= 2;
  850. Bitwidth = ElementBitwidth;
  851. NumVectors = 0;
  852. break;
  853. case 's':
  854. case 'a':
  855. Bitwidth = ElementBitwidth;
  856. NumVectors = 0;
  857. break;
  858. case 'k':
  859. Bitwidth *= 2;
  860. break;
  861. case 'c':
  862. Constant = true;
  863. LLVM_FALLTHROUGH;
  864. case 'p':
  865. Pointer = true;
  866. Bitwidth = ElementBitwidth;
  867. NumVectors = 0;
  868. break;
  869. case 'h':
  870. ElementBitwidth /= 2;
  871. break;
  872. case 'q':
  873. ElementBitwidth /= 2;
  874. Bitwidth *= 2;
  875. break;
  876. case 'e':
  877. ElementBitwidth /= 2;
  878. Signed = false;
  879. break;
  880. case 'm':
  881. ElementBitwidth /= 2;
  882. Bitwidth /= 2;
  883. break;
  884. case 'd':
  885. break;
  886. case '2':
  887. NumVectors = 2;
  888. break;
  889. case '3':
  890. NumVectors = 3;
  891. break;
  892. case '4':
  893. NumVectors = 4;
  894. break;
  895. case 'B':
  896. NumVectors = 2;
  897. if (!AppliedQuad)
  898. Bitwidth *= 2;
  899. break;
  900. case 'C':
  901. NumVectors = 3;
  902. if (!AppliedQuad)
  903. Bitwidth *= 2;
  904. break;
  905. case 'D':
  906. NumVectors = 4;
  907. if (!AppliedQuad)
  908. Bitwidth *= 2;
  909. break;
  910. case '7':
  911. if (AppliedQuad)
  912. Bitwidth /= 2;
  913. ElementBitwidth = 8;
  914. break;
  915. case '8':
  916. ElementBitwidth = 8;
  917. break;
  918. case '9':
  919. if (!AppliedQuad)
  920. Bitwidth *= 2;
  921. ElementBitwidth = 8;
  922. break;
  923. default:
  924. llvm_unreachable("Unhandled character!");
  925. }
  926. }
  927. //===----------------------------------------------------------------------===//
  928. // Intrinsic implementation
  929. //===----------------------------------------------------------------------===//
  930. std::string Intrinsic::getInstTypeCode(Type T, ClassKind CK) const {
  931. char typeCode = '\0';
  932. bool printNumber = true;
  933. if (CK == ClassB)
  934. return "";
  935. if (T.isPoly())
  936. typeCode = 'p';
  937. else if (T.isInteger())
  938. typeCode = T.isSigned() ? 's' : 'u';
  939. else
  940. typeCode = 'f';
  941. if (CK == ClassI) {
  942. switch (typeCode) {
  943. default:
  944. break;
  945. case 's':
  946. case 'u':
  947. case 'p':
  948. typeCode = 'i';
  949. break;
  950. }
  951. }
  952. if (CK == ClassB) {
  953. typeCode = '\0';
  954. }
  955. std::string S;
  956. if (typeCode != '\0')
  957. S.push_back(typeCode);
  958. if (printNumber)
  959. S += utostr(T.getElementSizeInBits());
  960. return S;
  961. }
  962. static bool isFloatingPointProtoModifier(char Mod) {
  963. return Mod == 'F' || Mod == 'f' || Mod == 'H' || Mod == 'Y' || Mod == 'I';
  964. }
  965. std::string Intrinsic::getBuiltinTypeStr() {
  966. ClassKind LocalCK = getClassKind(true);
  967. std::string S;
  968. Type RetT = getReturnType();
  969. if ((LocalCK == ClassI || LocalCK == ClassW) && RetT.isScalar() &&
  970. !RetT.isFloating())
  971. RetT.makeInteger(RetT.getElementSizeInBits(), false);
  972. // Since the return value must be one type, return a vector type of the
  973. // appropriate width which we will bitcast. An exception is made for
  974. // returning structs of 2, 3, or 4 vectors which are returned in a sret-like
  975. // fashion, storing them to a pointer arg.
  976. if (RetT.getNumVectors() > 1) {
  977. S += "vv*"; // void result with void* first argument
  978. } else {
  979. if (RetT.isPoly())
  980. RetT.makeInteger(RetT.getElementSizeInBits(), false);
  981. if (!RetT.isScalar() && !RetT.isSigned())
  982. RetT.makeSigned();
  983. bool ForcedVectorFloatingType = isFloatingPointProtoModifier(Proto[0]);
  984. if (LocalCK == ClassB && !RetT.isScalar() && !ForcedVectorFloatingType)
  985. // Cast to vector of 8-bit elements.
  986. RetT.makeInteger(8, true);
  987. S += RetT.builtin_str();
  988. }
  989. for (unsigned I = 0; I < getNumParams(); ++I) {
  990. Type T = getParamType(I);
  991. if (T.isPoly())
  992. T.makeInteger(T.getElementSizeInBits(), false);
  993. bool ForcedFloatingType = isFloatingPointProtoModifier(Proto[I + 1]);
  994. if (LocalCK == ClassB && !T.isScalar() && !ForcedFloatingType)
  995. T.makeInteger(8, true);
  996. // Halves always get converted to 8-bit elements.
  997. if (T.isHalf() && T.isVector() && !T.isScalarForMangling())
  998. T.makeInteger(8, true);
  999. if (LocalCK == ClassI)
  1000. T.makeSigned();
  1001. if (hasImmediate() && getImmediateIdx() == I)
  1002. T.makeImmediate(32);
  1003. S += T.builtin_str();
  1004. }
  1005. // Extra constant integer to hold type class enum for this function, e.g. s8
  1006. if (LocalCK == ClassB)
  1007. S += "i";
  1008. return S;
  1009. }
  1010. std::string Intrinsic::getMangledName(bool ForceClassS) const {
  1011. // Check if the prototype has a scalar operand with the type of the vector
  1012. // elements. If not, bitcasting the args will take care of arg checking.
  1013. // The actual signedness etc. will be taken care of with special enums.
  1014. ClassKind LocalCK = CK;
  1015. if (!protoHasScalar())
  1016. LocalCK = ClassB;
  1017. return mangleName(Name, ForceClassS ? ClassS : LocalCK);
  1018. }
  1019. std::string Intrinsic::mangleName(std::string Name, ClassKind LocalCK) const {
  1020. std::string typeCode = getInstTypeCode(BaseType, LocalCK);
  1021. std::string S = Name;
  1022. if (Name == "vcvt_f16_f32" || Name == "vcvt_f32_f16" ||
  1023. Name == "vcvt_f32_f64" || Name == "vcvt_f64_f32")
  1024. return Name;
  1025. if (!typeCode.empty()) {
  1026. // If the name ends with _xN (N = 2,3,4), insert the typeCode before _xN.
  1027. if (Name.size() >= 3 && isdigit(Name.back()) &&
  1028. Name[Name.length() - 2] == 'x' && Name[Name.length() - 3] == '_')
  1029. S.insert(S.length() - 3, "_" + typeCode);
  1030. else
  1031. S += "_" + typeCode;
  1032. }
  1033. if (BaseType != InBaseType) {
  1034. // A reinterpret - out the input base type at the end.
  1035. S += "_" + getInstTypeCode(InBaseType, LocalCK);
  1036. }
  1037. if (LocalCK == ClassB)
  1038. S += "_v";
  1039. // Insert a 'q' before the first '_' character so that it ends up before
  1040. // _lane or _n on vector-scalar operations.
  1041. if (BaseType.getSizeInBits() == 128 && !BaseType.noManglingQ()) {
  1042. size_t Pos = S.find('_');
  1043. S.insert(Pos, "q");
  1044. }
  1045. char Suffix = '\0';
  1046. if (BaseType.isScalarForMangling()) {
  1047. switch (BaseType.getElementSizeInBits()) {
  1048. case 8: Suffix = 'b'; break;
  1049. case 16: Suffix = 'h'; break;
  1050. case 32: Suffix = 's'; break;
  1051. case 64: Suffix = 'd'; break;
  1052. default: llvm_unreachable("Bad suffix!");
  1053. }
  1054. }
  1055. if (Suffix != '\0') {
  1056. size_t Pos = S.find('_');
  1057. S.insert(Pos, &Suffix, 1);
  1058. }
  1059. return S;
  1060. }
  1061. std::string Intrinsic::replaceParamsIn(std::string S) {
  1062. while (S.find('$') != std::string::npos) {
  1063. size_t Pos = S.find('$');
  1064. size_t End = Pos + 1;
  1065. while (isalpha(S[End]))
  1066. ++End;
  1067. std::string VarName = S.substr(Pos + 1, End - Pos - 1);
  1068. assert_with_loc(Variables.find(VarName) != Variables.end(),
  1069. "Variable not defined!");
  1070. S.replace(Pos, End - Pos, Variables.find(VarName)->second.getName());
  1071. }
  1072. return S;
  1073. }
  1074. void Intrinsic::initVariables() {
  1075. Variables.clear();
  1076. // Modify the TypeSpec per-argument to get a concrete Type, and create
  1077. // known variables for each.
  1078. for (unsigned I = 1; I < Proto.size(); ++I) {
  1079. char NameC = '0' + (I - 1);
  1080. std::string Name = "p";
  1081. Name.push_back(NameC);
  1082. Variables[Name] = Variable(Types[I], Name + VariablePostfix);
  1083. }
  1084. RetVar = Variable(Types[0], "ret" + VariablePostfix);
  1085. }
  1086. void Intrinsic::emitPrototype(StringRef NamePrefix) {
  1087. if (UseMacro)
  1088. OS << "#define ";
  1089. else
  1090. OS << "__ai " << Types[0].str() << " ";
  1091. OS << NamePrefix.str() << mangleName(Name, ClassS) << "(";
  1092. for (unsigned I = 0; I < getNumParams(); ++I) {
  1093. if (I != 0)
  1094. OS << ", ";
  1095. char NameC = '0' + I;
  1096. std::string Name = "p";
  1097. Name.push_back(NameC);
  1098. assert(Variables.find(Name) != Variables.end());
  1099. Variable &V = Variables[Name];
  1100. if (!UseMacro)
  1101. OS << V.getType().str() << " ";
  1102. OS << V.getName();
  1103. }
  1104. OS << ")";
  1105. }
  1106. void Intrinsic::emitOpeningBrace() {
  1107. if (UseMacro)
  1108. OS << " __extension__ ({";
  1109. else
  1110. OS << " {";
  1111. emitNewLine();
  1112. }
  1113. void Intrinsic::emitClosingBrace() {
  1114. if (UseMacro)
  1115. OS << "})";
  1116. else
  1117. OS << "}";
  1118. }
  1119. void Intrinsic::emitNewLine() {
  1120. if (UseMacro)
  1121. OS << " \\\n";
  1122. else
  1123. OS << "\n";
  1124. }
  1125. void Intrinsic::emitReverseVariable(Variable &Dest, Variable &Src) {
  1126. if (Dest.getType().getNumVectors() > 1) {
  1127. emitNewLine();
  1128. for (unsigned K = 0; K < Dest.getType().getNumVectors(); ++K) {
  1129. OS << " " << Dest.getName() << ".val[" << K << "] = "
  1130. << "__builtin_shufflevector("
  1131. << Src.getName() << ".val[" << K << "], "
  1132. << Src.getName() << ".val[" << K << "]";
  1133. for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
  1134. OS << ", " << J;
  1135. OS << ");";
  1136. emitNewLine();
  1137. }
  1138. } else {
  1139. OS << " " << Dest.getName()
  1140. << " = __builtin_shufflevector(" << Src.getName() << ", " << Src.getName();
  1141. for (int J = Dest.getType().getNumElements() - 1; J >= 0; --J)
  1142. OS << ", " << J;
  1143. OS << ");";
  1144. emitNewLine();
  1145. }
  1146. }
  1147. void Intrinsic::emitArgumentReversal() {
  1148. if (isBigEndianSafe())
  1149. return;
  1150. // Reverse all vector arguments.
  1151. for (unsigned I = 0; I < getNumParams(); ++I) {
  1152. std::string Name = "p" + utostr(I);
  1153. std::string NewName = "rev" + utostr(I);
  1154. Variable &V = Variables[Name];
  1155. Variable NewV(V.getType(), NewName + VariablePostfix);
  1156. if (!NewV.getType().isVector() || NewV.getType().getNumElements() == 1)
  1157. continue;
  1158. OS << " " << NewV.getType().str() << " " << NewV.getName() << ";";
  1159. emitReverseVariable(NewV, V);
  1160. V = NewV;
  1161. }
  1162. }
  1163. void Intrinsic::emitReturnReversal() {
  1164. if (isBigEndianSafe())
  1165. return;
  1166. if (!getReturnType().isVector() || getReturnType().isVoid() ||
  1167. getReturnType().getNumElements() == 1)
  1168. return;
  1169. emitReverseVariable(RetVar, RetVar);
  1170. }
  1171. void Intrinsic::emitShadowedArgs() {
  1172. // Macro arguments are not type-checked like inline function arguments,
  1173. // so assign them to local temporaries to get the right type checking.
  1174. if (!UseMacro)
  1175. return;
  1176. for (unsigned I = 0; I < getNumParams(); ++I) {
  1177. // Do not create a temporary for an immediate argument.
  1178. // That would defeat the whole point of using a macro!
  1179. if (hasImmediate() && Proto[I+1] == 'i')
  1180. continue;
  1181. // Do not create a temporary for pointer arguments. The input
  1182. // pointer may have an alignment hint.
  1183. if (getParamType(I).isPointer())
  1184. continue;
  1185. std::string Name = "p" + utostr(I);
  1186. assert(Variables.find(Name) != Variables.end());
  1187. Variable &V = Variables[Name];
  1188. std::string NewName = "s" + utostr(I);
  1189. Variable V2(V.getType(), NewName + VariablePostfix);
  1190. OS << " " << V2.getType().str() << " " << V2.getName() << " = "
  1191. << V.getName() << ";";
  1192. emitNewLine();
  1193. V = V2;
  1194. }
  1195. }
  1196. // We don't check 'a' in this function, because for builtin function the
  1197. // argument matching to 'a' uses a vector type splatted from a scalar type.
  1198. bool Intrinsic::protoHasScalar() const {
  1199. return (Proto.find('s') != std::string::npos ||
  1200. Proto.find('z') != std::string::npos ||
  1201. Proto.find('r') != std::string::npos ||
  1202. Proto.find('b') != std::string::npos ||
  1203. Proto.find('$') != std::string::npos ||
  1204. Proto.find('y') != std::string::npos ||
  1205. Proto.find('o') != std::string::npos);
  1206. }
  1207. void Intrinsic::emitBodyAsBuiltinCall() {
  1208. std::string S;
  1209. // If this builtin returns a struct 2, 3, or 4 vectors, pass it as an implicit
  1210. // sret-like argument.
  1211. bool SRet = getReturnType().getNumVectors() >= 2;
  1212. StringRef N = Name;
  1213. if (hasSplat()) {
  1214. // Call the non-splat builtin: chop off the "_n" suffix from the name.
  1215. assert(N.endswith("_n"));
  1216. N = N.drop_back(2);
  1217. }
  1218. ClassKind LocalCK = CK;
  1219. if (!protoHasScalar())
  1220. LocalCK = ClassB;
  1221. if (!getReturnType().isVoid() && !SRet)
  1222. S += "(" + RetVar.getType().str() + ") ";
  1223. S += "__builtin_neon_" + mangleName(N, LocalCK) + "(";
  1224. if (SRet)
  1225. S += "&" + RetVar.getName() + ", ";
  1226. for (unsigned I = 0; I < getNumParams(); ++I) {
  1227. Variable &V = Variables["p" + utostr(I)];
  1228. Type T = V.getType();
  1229. // Handle multiple-vector values specially, emitting each subvector as an
  1230. // argument to the builtin.
  1231. if (T.getNumVectors() > 1) {
  1232. // Check if an explicit cast is needed.
  1233. std::string Cast;
  1234. if (LocalCK == ClassB) {
  1235. Type T2 = T;
  1236. T2.makeOneVector();
  1237. T2.makeInteger(8, /*Signed=*/true);
  1238. Cast = "(" + T2.str() + ")";
  1239. }
  1240. for (unsigned J = 0; J < T.getNumVectors(); ++J)
  1241. S += Cast + V.getName() + ".val[" + utostr(J) + "], ";
  1242. continue;
  1243. }
  1244. std::string Arg;
  1245. Type CastToType = T;
  1246. if (hasSplat() && I == getSplatIdx()) {
  1247. Arg = "(" + BaseType.str() + ") {";
  1248. for (unsigned J = 0; J < BaseType.getNumElements(); ++J) {
  1249. if (J != 0)
  1250. Arg += ", ";
  1251. Arg += V.getName();
  1252. }
  1253. Arg += "}";
  1254. CastToType = BaseType;
  1255. } else {
  1256. Arg = V.getName();
  1257. }
  1258. // Check if an explicit cast is needed.
  1259. if (CastToType.isVector() &&
  1260. (LocalCK == ClassB || (T.isHalf() && !T.isScalarForMangling()))) {
  1261. CastToType.makeInteger(8, true);
  1262. Arg = "(" + CastToType.str() + ")" + Arg;
  1263. } else if (CastToType.isVector() && LocalCK == ClassI) {
  1264. CastToType.makeSigned();
  1265. Arg = "(" + CastToType.str() + ")" + Arg;
  1266. }
  1267. S += Arg + ", ";
  1268. }
  1269. // Extra constant integer to hold type class enum for this function, e.g. s8
  1270. if (getClassKind(true) == ClassB) {
  1271. Type ThisTy = getReturnType();
  1272. if (Proto[0] == 'v' || isFloatingPointProtoModifier(Proto[0]))
  1273. ThisTy = getParamType(0);
  1274. if (ThisTy.isPointer())
  1275. ThisTy = getParamType(1);
  1276. S += utostr(ThisTy.getNeonEnum());
  1277. } else {
  1278. // Remove extraneous ", ".
  1279. S.pop_back();
  1280. S.pop_back();
  1281. }
  1282. S += ");";
  1283. std::string RetExpr;
  1284. if (!SRet && !RetVar.getType().isVoid())
  1285. RetExpr = RetVar.getName() + " = ";
  1286. OS << " " << RetExpr << S;
  1287. emitNewLine();
  1288. }
  1289. void Intrinsic::emitBody(StringRef CallPrefix) {
  1290. std::vector<std::string> Lines;
  1291. assert(RetVar.getType() == Types[0]);
  1292. // Create a return variable, if we're not void.
  1293. if (!RetVar.getType().isVoid()) {
  1294. OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";";
  1295. emitNewLine();
  1296. }
  1297. if (!Body || Body->getValues().empty()) {
  1298. // Nothing specific to output - must output a builtin.
  1299. emitBodyAsBuiltinCall();
  1300. return;
  1301. }
  1302. // We have a list of "things to output". The last should be returned.
  1303. for (auto *I : Body->getValues()) {
  1304. if (StringInit *SI = dyn_cast<StringInit>(I)) {
  1305. Lines.push_back(replaceParamsIn(SI->getAsString()));
  1306. } else if (DagInit *DI = dyn_cast<DagInit>(I)) {
  1307. DagEmitter DE(*this, CallPrefix);
  1308. Lines.push_back(DE.emitDag(DI).second + ";");
  1309. }
  1310. }
  1311. assert(!Lines.empty() && "Empty def?");
  1312. if (!RetVar.getType().isVoid())
  1313. Lines.back().insert(0, RetVar.getName() + " = ");
  1314. for (auto &L : Lines) {
  1315. OS << " " << L;
  1316. emitNewLine();
  1317. }
  1318. }
  1319. void Intrinsic::emitReturn() {
  1320. if (RetVar.getType().isVoid())
  1321. return;
  1322. if (UseMacro)
  1323. OS << " " << RetVar.getName() << ";";
  1324. else
  1325. OS << " return " << RetVar.getName() << ";";
  1326. emitNewLine();
  1327. }
  1328. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDag(DagInit *DI) {
  1329. // At this point we should only be seeing a def.
  1330. DefInit *DefI = cast<DefInit>(DI->getOperator());
  1331. std::string Op = DefI->getAsString();
  1332. if (Op == "cast" || Op == "bitcast")
  1333. return emitDagCast(DI, Op == "bitcast");
  1334. if (Op == "shuffle")
  1335. return emitDagShuffle(DI);
  1336. if (Op == "dup")
  1337. return emitDagDup(DI);
  1338. if (Op == "dup_typed")
  1339. return emitDagDupTyped(DI);
  1340. if (Op == "splat")
  1341. return emitDagSplat(DI);
  1342. if (Op == "save_temp")
  1343. return emitDagSaveTemp(DI);
  1344. if (Op == "op")
  1345. return emitDagOp(DI);
  1346. if (Op == "call")
  1347. return emitDagCall(DI);
  1348. if (Op == "name_replace")
  1349. return emitDagNameReplace(DI);
  1350. if (Op == "literal")
  1351. return emitDagLiteral(DI);
  1352. assert_with_loc(false, "Unknown operation!");
  1353. return std::make_pair(Type::getVoid(), "");
  1354. }
  1355. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagOp(DagInit *DI) {
  1356. std::string Op = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  1357. if (DI->getNumArgs() == 2) {
  1358. // Unary op.
  1359. std::pair<Type, std::string> R =
  1360. emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
  1361. return std::make_pair(R.first, Op + R.second);
  1362. } else {
  1363. assert(DI->getNumArgs() == 3 && "Can only handle unary and binary ops!");
  1364. std::pair<Type, std::string> R1 =
  1365. emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
  1366. std::pair<Type, std::string> R2 =
  1367. emitDagArg(DI->getArg(2), DI->getArgNameStr(2));
  1368. assert_with_loc(R1.first == R2.first, "Argument type mismatch!");
  1369. return std::make_pair(R1.first, R1.second + " " + Op + " " + R2.second);
  1370. }
  1371. }
  1372. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCall(DagInit *DI) {
  1373. std::vector<Type> Types;
  1374. std::vector<std::string> Values;
  1375. for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
  1376. std::pair<Type, std::string> R =
  1377. emitDagArg(DI->getArg(I + 1), DI->getArgNameStr(I + 1));
  1378. Types.push_back(R.first);
  1379. Values.push_back(R.second);
  1380. }
  1381. // Look up the called intrinsic.
  1382. std::string N;
  1383. if (StringInit *SI = dyn_cast<StringInit>(DI->getArg(0)))
  1384. N = SI->getAsUnquotedString();
  1385. else
  1386. N = emitDagArg(DI->getArg(0), "").second;
  1387. Intrinsic &Callee = Intr.Emitter.getIntrinsic(N, Types);
  1388. // Make sure the callee is known as an early def.
  1389. Callee.setNeededEarly();
  1390. Intr.Dependencies.insert(&Callee);
  1391. // Now create the call itself.
  1392. std::string S = "";
  1393. if (!Callee.isBigEndianSafe())
  1394. S += CallPrefix.str();
  1395. S += Callee.getMangledName(true) + "(";
  1396. for (unsigned I = 0; I < DI->getNumArgs() - 1; ++I) {
  1397. if (I != 0)
  1398. S += ", ";
  1399. S += Values[I];
  1400. }
  1401. S += ")";
  1402. return std::make_pair(Callee.getReturnType(), S);
  1403. }
  1404. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagCast(DagInit *DI,
  1405. bool IsBitCast){
  1406. // (cast MOD* VAL) -> cast VAL to type given by MOD.
  1407. std::pair<Type, std::string> R = emitDagArg(
  1408. DI->getArg(DI->getNumArgs() - 1),
  1409. DI->getArgNameStr(DI->getNumArgs() - 1));
  1410. Type castToType = R.first;
  1411. for (unsigned ArgIdx = 0; ArgIdx < DI->getNumArgs() - 1; ++ArgIdx) {
  1412. // MOD can take several forms:
  1413. // 1. $X - take the type of parameter / variable X.
  1414. // 2. The value "R" - take the type of the return type.
  1415. // 3. a type string
  1416. // 4. The value "U" or "S" to switch the signedness.
  1417. // 5. The value "H" or "D" to half or double the bitwidth.
  1418. // 6. The value "8" to convert to 8-bit (signed) integer lanes.
  1419. if (!DI->getArgNameStr(ArgIdx).empty()) {
  1420. assert_with_loc(Intr.Variables.find(DI->getArgNameStr(ArgIdx)) !=
  1421. Intr.Variables.end(),
  1422. "Variable not found");
  1423. castToType = Intr.Variables[DI->getArgNameStr(ArgIdx)].getType();
  1424. } else {
  1425. StringInit *SI = dyn_cast<StringInit>(DI->getArg(ArgIdx));
  1426. assert_with_loc(SI, "Expected string type or $Name for cast type");
  1427. if (SI->getAsUnquotedString() == "R") {
  1428. castToType = Intr.getReturnType();
  1429. } else if (SI->getAsUnquotedString() == "U") {
  1430. castToType.makeUnsigned();
  1431. } else if (SI->getAsUnquotedString() == "S") {
  1432. castToType.makeSigned();
  1433. } else if (SI->getAsUnquotedString() == "H") {
  1434. castToType.halveLanes();
  1435. } else if (SI->getAsUnquotedString() == "D") {
  1436. castToType.doubleLanes();
  1437. } else if (SI->getAsUnquotedString() == "8") {
  1438. castToType.makeInteger(8, true);
  1439. } else {
  1440. castToType = Type::fromTypedefName(SI->getAsUnquotedString());
  1441. assert_with_loc(!castToType.isVoid(), "Unknown typedef");
  1442. }
  1443. }
  1444. }
  1445. std::string S;
  1446. if (IsBitCast) {
  1447. // Emit a reinterpret cast. The second operand must be an lvalue, so create
  1448. // a temporary.
  1449. std::string N = "reint";
  1450. unsigned I = 0;
  1451. while (Intr.Variables.find(N) != Intr.Variables.end())
  1452. N = "reint" + utostr(++I);
  1453. Intr.Variables[N] = Variable(R.first, N + Intr.VariablePostfix);
  1454. Intr.OS << R.first.str() << " " << Intr.Variables[N].getName() << " = "
  1455. << R.second << ";";
  1456. Intr.emitNewLine();
  1457. S = "*(" + castToType.str() + " *) &" + Intr.Variables[N].getName() + "";
  1458. } else {
  1459. // Emit a normal (static) cast.
  1460. S = "(" + castToType.str() + ")(" + R.second + ")";
  1461. }
  1462. return std::make_pair(castToType, S);
  1463. }
  1464. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagShuffle(DagInit *DI){
  1465. // See the documentation in arm_neon.td for a description of these operators.
  1466. class LowHalf : public SetTheory::Operator {
  1467. public:
  1468. void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
  1469. ArrayRef<SMLoc> Loc) override {
  1470. SetTheory::RecSet Elts2;
  1471. ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
  1472. Elts.insert(Elts2.begin(), Elts2.begin() + (Elts2.size() / 2));
  1473. }
  1474. };
  1475. class HighHalf : public SetTheory::Operator {
  1476. public:
  1477. void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
  1478. ArrayRef<SMLoc> Loc) override {
  1479. SetTheory::RecSet Elts2;
  1480. ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts2, Loc);
  1481. Elts.insert(Elts2.begin() + (Elts2.size() / 2), Elts2.end());
  1482. }
  1483. };
  1484. class Rev : public SetTheory::Operator {
  1485. unsigned ElementSize;
  1486. public:
  1487. Rev(unsigned ElementSize) : ElementSize(ElementSize) {}
  1488. void apply(SetTheory &ST, DagInit *Expr, SetTheory::RecSet &Elts,
  1489. ArrayRef<SMLoc> Loc) override {
  1490. SetTheory::RecSet Elts2;
  1491. ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Elts2, Loc);
  1492. int64_t VectorSize = cast<IntInit>(Expr->getArg(0))->getValue();
  1493. VectorSize /= ElementSize;
  1494. std::vector<Record *> Revved;
  1495. for (unsigned VI = 0; VI < Elts2.size(); VI += VectorSize) {
  1496. for (int LI = VectorSize - 1; LI >= 0; --LI) {
  1497. Revved.push_back(Elts2[VI + LI]);
  1498. }
  1499. }
  1500. Elts.insert(Revved.begin(), Revved.end());
  1501. }
  1502. };
  1503. class MaskExpander : public SetTheory::Expander {
  1504. unsigned N;
  1505. public:
  1506. MaskExpander(unsigned N) : N(N) {}
  1507. void expand(SetTheory &ST, Record *R, SetTheory::RecSet &Elts) override {
  1508. unsigned Addend = 0;
  1509. if (R->getName() == "mask0")
  1510. Addend = 0;
  1511. else if (R->getName() == "mask1")
  1512. Addend = N;
  1513. else
  1514. return;
  1515. for (unsigned I = 0; I < N; ++I)
  1516. Elts.insert(R->getRecords().getDef("sv" + utostr(I + Addend)));
  1517. }
  1518. };
  1519. // (shuffle arg1, arg2, sequence)
  1520. std::pair<Type, std::string> Arg1 =
  1521. emitDagArg(DI->getArg(0), DI->getArgNameStr(0));
  1522. std::pair<Type, std::string> Arg2 =
  1523. emitDagArg(DI->getArg(1), DI->getArgNameStr(1));
  1524. assert_with_loc(Arg1.first == Arg2.first,
  1525. "Different types in arguments to shuffle!");
  1526. SetTheory ST;
  1527. SetTheory::RecSet Elts;
  1528. ST.addOperator("lowhalf", std::make_unique<LowHalf>());
  1529. ST.addOperator("highhalf", std::make_unique<HighHalf>());
  1530. ST.addOperator("rev",
  1531. std::make_unique<Rev>(Arg1.first.getElementSizeInBits()));
  1532. ST.addExpander("MaskExpand",
  1533. std::make_unique<MaskExpander>(Arg1.first.getNumElements()));
  1534. ST.evaluate(DI->getArg(2), Elts, None);
  1535. std::string S = "__builtin_shufflevector(" + Arg1.second + ", " + Arg2.second;
  1536. for (auto &E : Elts) {
  1537. StringRef Name = E->getName();
  1538. assert_with_loc(Name.startswith("sv"),
  1539. "Incorrect element kind in shuffle mask!");
  1540. S += ", " + Name.drop_front(2).str();
  1541. }
  1542. S += ")";
  1543. // Recalculate the return type - the shuffle may have halved or doubled it.
  1544. Type T(Arg1.first);
  1545. if (Elts.size() > T.getNumElements()) {
  1546. assert_with_loc(
  1547. Elts.size() == T.getNumElements() * 2,
  1548. "Can only double or half the number of elements in a shuffle!");
  1549. T.doubleLanes();
  1550. } else if (Elts.size() < T.getNumElements()) {
  1551. assert_with_loc(
  1552. Elts.size() == T.getNumElements() / 2,
  1553. "Can only double or half the number of elements in a shuffle!");
  1554. T.halveLanes();
  1555. }
  1556. return std::make_pair(T, S);
  1557. }
  1558. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDup(DagInit *DI) {
  1559. assert_with_loc(DI->getNumArgs() == 1, "dup() expects one argument");
  1560. std::pair<Type, std::string> A = emitDagArg(DI->getArg(0),
  1561. DI->getArgNameStr(0));
  1562. assert_with_loc(A.first.isScalar(), "dup() expects a scalar argument");
  1563. Type T = Intr.getBaseType();
  1564. assert_with_loc(T.isVector(), "dup() used but default type is scalar!");
  1565. std::string S = "(" + T.str() + ") {";
  1566. for (unsigned I = 0; I < T.getNumElements(); ++I) {
  1567. if (I != 0)
  1568. S += ", ";
  1569. S += A.second;
  1570. }
  1571. S += "}";
  1572. return std::make_pair(T, S);
  1573. }
  1574. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagDupTyped(DagInit *DI) {
  1575. assert_with_loc(DI->getNumArgs() == 2, "dup_typed() expects two arguments");
  1576. std::pair<Type, std::string> A = emitDagArg(DI->getArg(0),
  1577. DI->getArgNameStr(0));
  1578. std::pair<Type, std::string> B = emitDagArg(DI->getArg(1),
  1579. DI->getArgNameStr(1));
  1580. assert_with_loc(B.first.isScalar(),
  1581. "dup_typed() requires a scalar as the second argument");
  1582. Type T = A.first;
  1583. assert_with_loc(T.isVector(), "dup_typed() used but target type is scalar!");
  1584. std::string S = "(" + T.str() + ") {";
  1585. for (unsigned I = 0; I < T.getNumElements(); ++I) {
  1586. if (I != 0)
  1587. S += ", ";
  1588. S += B.second;
  1589. }
  1590. S += "}";
  1591. return std::make_pair(T, S);
  1592. }
  1593. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSplat(DagInit *DI) {
  1594. assert_with_loc(DI->getNumArgs() == 2, "splat() expects two arguments");
  1595. std::pair<Type, std::string> A = emitDagArg(DI->getArg(0),
  1596. DI->getArgNameStr(0));
  1597. std::pair<Type, std::string> B = emitDagArg(DI->getArg(1),
  1598. DI->getArgNameStr(1));
  1599. assert_with_loc(B.first.isScalar(),
  1600. "splat() requires a scalar int as the second argument");
  1601. std::string S = "__builtin_shufflevector(" + A.second + ", " + A.second;
  1602. for (unsigned I = 0; I < Intr.getBaseType().getNumElements(); ++I) {
  1603. S += ", " + B.second;
  1604. }
  1605. S += ")";
  1606. return std::make_pair(Intr.getBaseType(), S);
  1607. }
  1608. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagSaveTemp(DagInit *DI) {
  1609. assert_with_loc(DI->getNumArgs() == 2, "save_temp() expects two arguments");
  1610. std::pair<Type, std::string> A = emitDagArg(DI->getArg(1),
  1611. DI->getArgNameStr(1));
  1612. assert_with_loc(!A.first.isVoid(),
  1613. "Argument to save_temp() must have non-void type!");
  1614. std::string N = DI->getArgNameStr(0);
  1615. assert_with_loc(!N.empty(),
  1616. "save_temp() expects a name as the first argument");
  1617. assert_with_loc(Intr.Variables.find(N) == Intr.Variables.end(),
  1618. "Variable already defined!");
  1619. Intr.Variables[N] = Variable(A.first, N + Intr.VariablePostfix);
  1620. std::string S =
  1621. A.first.str() + " " + Intr.Variables[N].getName() + " = " + A.second;
  1622. return std::make_pair(Type::getVoid(), S);
  1623. }
  1624. std::pair<Type, std::string>
  1625. Intrinsic::DagEmitter::emitDagNameReplace(DagInit *DI) {
  1626. std::string S = Intr.Name;
  1627. assert_with_loc(DI->getNumArgs() == 2, "name_replace requires 2 arguments!");
  1628. std::string ToReplace = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  1629. std::string ReplaceWith = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
  1630. size_t Idx = S.find(ToReplace);
  1631. assert_with_loc(Idx != std::string::npos, "name should contain '" + ToReplace + "'!");
  1632. S.replace(Idx, ToReplace.size(), ReplaceWith);
  1633. return std::make_pair(Type::getVoid(), S);
  1634. }
  1635. std::pair<Type, std::string> Intrinsic::DagEmitter::emitDagLiteral(DagInit *DI){
  1636. std::string Ty = cast<StringInit>(DI->getArg(0))->getAsUnquotedString();
  1637. std::string Value = cast<StringInit>(DI->getArg(1))->getAsUnquotedString();
  1638. return std::make_pair(Type::fromTypedefName(Ty), Value);
  1639. }
  1640. std::pair<Type, std::string>
  1641. Intrinsic::DagEmitter::emitDagArg(Init *Arg, std::string ArgName) {
  1642. if (!ArgName.empty()) {
  1643. assert_with_loc(!Arg->isComplete(),
  1644. "Arguments must either be DAGs or names, not both!");
  1645. assert_with_loc(Intr.Variables.find(ArgName) != Intr.Variables.end(),
  1646. "Variable not defined!");
  1647. Variable &V = Intr.Variables[ArgName];
  1648. return std::make_pair(V.getType(), V.getName());
  1649. }
  1650. assert(Arg && "Neither ArgName nor Arg?!");
  1651. DagInit *DI = dyn_cast<DagInit>(Arg);
  1652. assert_with_loc(DI, "Arguments must either be DAGs or names!");
  1653. return emitDag(DI);
  1654. }
  1655. std::string Intrinsic::generate() {
  1656. // Avoid duplicated code for big and little endian
  1657. if (isBigEndianSafe()) {
  1658. generateImpl(false, "", "");
  1659. return OS.str();
  1660. }
  1661. // Little endian intrinsics are simple and don't require any argument
  1662. // swapping.
  1663. OS << "#ifdef __LITTLE_ENDIAN__\n";
  1664. generateImpl(false, "", "");
  1665. OS << "#else\n";
  1666. // Big endian intrinsics are more complex. The user intended these
  1667. // intrinsics to operate on a vector "as-if" loaded by (V)LDR,
  1668. // but we load as-if (V)LD1. So we should swap all arguments and
  1669. // swap the return value too.
  1670. //
  1671. // If we call sub-intrinsics, we should call a version that does
  1672. // not re-swap the arguments!
  1673. generateImpl(true, "", "__noswap_");
  1674. // If we're needed early, create a non-swapping variant for
  1675. // big-endian.
  1676. if (NeededEarly) {
  1677. generateImpl(false, "__noswap_", "__noswap_");
  1678. }
  1679. OS << "#endif\n\n";
  1680. return OS.str();
  1681. }
  1682. void Intrinsic::generateImpl(bool ReverseArguments,
  1683. StringRef NamePrefix, StringRef CallPrefix) {
  1684. CurrentRecord = R;
  1685. // If we call a macro, our local variables may be corrupted due to
  1686. // lack of proper lexical scoping. So, add a globally unique postfix
  1687. // to every variable.
  1688. //
  1689. // indexBody() should have set up the Dependencies set by now.
  1690. for (auto *I : Dependencies)
  1691. if (I->UseMacro) {
  1692. VariablePostfix = "_" + utostr(Emitter.getUniqueNumber());
  1693. break;
  1694. }
  1695. initVariables();
  1696. emitPrototype(NamePrefix);
  1697. if (IsUnavailable) {
  1698. OS << " __attribute__((unavailable));";
  1699. } else {
  1700. emitOpeningBrace();
  1701. emitShadowedArgs();
  1702. if (ReverseArguments)
  1703. emitArgumentReversal();
  1704. emitBody(CallPrefix);
  1705. if (ReverseArguments)
  1706. emitReturnReversal();
  1707. emitReturn();
  1708. emitClosingBrace();
  1709. }
  1710. OS << "\n";
  1711. CurrentRecord = nullptr;
  1712. }
  1713. void Intrinsic::indexBody() {
  1714. CurrentRecord = R;
  1715. initVariables();
  1716. emitBody("");
  1717. OS.str("");
  1718. CurrentRecord = nullptr;
  1719. }
  1720. //===----------------------------------------------------------------------===//
  1721. // NeonEmitter implementation
  1722. //===----------------------------------------------------------------------===//
  1723. Intrinsic &NeonEmitter::getIntrinsic(StringRef Name, ArrayRef<Type> Types) {
  1724. // First, look up the name in the intrinsic map.
  1725. assert_with_loc(IntrinsicMap.find(Name.str()) != IntrinsicMap.end(),
  1726. ("Intrinsic '" + Name + "' not found!").str());
  1727. auto &V = IntrinsicMap.find(Name.str())->second;
  1728. std::vector<Intrinsic *> GoodVec;
  1729. // Create a string to print if we end up failing.
  1730. std::string ErrMsg = "looking up intrinsic '" + Name.str() + "(";
  1731. for (unsigned I = 0; I < Types.size(); ++I) {
  1732. if (I != 0)
  1733. ErrMsg += ", ";
  1734. ErrMsg += Types[I].str();
  1735. }
  1736. ErrMsg += ")'\n";
  1737. ErrMsg += "Available overloads:\n";
  1738. // Now, look through each intrinsic implementation and see if the types are
  1739. // compatible.
  1740. for (auto &I : V) {
  1741. ErrMsg += " - " + I.getReturnType().str() + " " + I.getMangledName();
  1742. ErrMsg += "(";
  1743. for (unsigned A = 0; A < I.getNumParams(); ++A) {
  1744. if (A != 0)
  1745. ErrMsg += ", ";
  1746. ErrMsg += I.getParamType(A).str();
  1747. }
  1748. ErrMsg += ")\n";
  1749. if (I.getNumParams() != Types.size())
  1750. continue;
  1751. bool Good = true;
  1752. for (unsigned Arg = 0; Arg < Types.size(); ++Arg) {
  1753. if (I.getParamType(Arg) != Types[Arg]) {
  1754. Good = false;
  1755. break;
  1756. }
  1757. }
  1758. if (Good)
  1759. GoodVec.push_back(&I);
  1760. }
  1761. assert_with_loc(!GoodVec.empty(),
  1762. "No compatible intrinsic found - " + ErrMsg);
  1763. assert_with_loc(GoodVec.size() == 1, "Multiple overloads found - " + ErrMsg);
  1764. return *GoodVec.front();
  1765. }
  1766. void NeonEmitter::createIntrinsic(Record *R,
  1767. SmallVectorImpl<Intrinsic *> &Out) {
  1768. std::string Name = R->getValueAsString("Name");
  1769. std::string Proto = R->getValueAsString("Prototype");
  1770. std::string Types = R->getValueAsString("Types");
  1771. Record *OperationRec = R->getValueAsDef("Operation");
  1772. bool CartesianProductOfTypes = R->getValueAsBit("CartesianProductOfTypes");
  1773. bool BigEndianSafe = R->getValueAsBit("BigEndianSafe");
  1774. std::string Guard = R->getValueAsString("ArchGuard");
  1775. bool IsUnavailable = OperationRec->getValueAsBit("Unavailable");
  1776. // Set the global current record. This allows assert_with_loc to produce
  1777. // decent location information even when highly nested.
  1778. CurrentRecord = R;
  1779. ListInit *Body = OperationRec->getValueAsListInit("Ops");
  1780. std::vector<TypeSpec> TypeSpecs = TypeSpec::fromTypeSpecs(Types);
  1781. ClassKind CK = ClassNone;
  1782. if (R->getSuperClasses().size() >= 2)
  1783. CK = ClassMap[R->getSuperClasses()[1].first];
  1784. std::vector<std::pair<TypeSpec, TypeSpec>> NewTypeSpecs;
  1785. for (auto TS : TypeSpecs) {
  1786. if (CartesianProductOfTypes) {
  1787. Type DefaultT(TS, 'd');
  1788. for (auto SrcTS : TypeSpecs) {
  1789. Type DefaultSrcT(SrcTS, 'd');
  1790. if (TS == SrcTS ||
  1791. DefaultSrcT.getSizeInBits() != DefaultT.getSizeInBits())
  1792. continue;
  1793. NewTypeSpecs.push_back(std::make_pair(TS, SrcTS));
  1794. }
  1795. } else {
  1796. NewTypeSpecs.push_back(std::make_pair(TS, TS));
  1797. }
  1798. }
  1799. llvm::sort(NewTypeSpecs);
  1800. NewTypeSpecs.erase(std::unique(NewTypeSpecs.begin(), NewTypeSpecs.end()),
  1801. NewTypeSpecs.end());
  1802. auto &Entry = IntrinsicMap[Name];
  1803. for (auto &I : NewTypeSpecs) {
  1804. Entry.emplace_back(R, Name, Proto, I.first, I.second, CK, Body, *this,
  1805. Guard, IsUnavailable, BigEndianSafe);
  1806. Out.push_back(&Entry.back());
  1807. }
  1808. CurrentRecord = nullptr;
  1809. }
  1810. /// genBuiltinsDef: Generate the BuiltinsARM.def and BuiltinsAArch64.def
  1811. /// declaration of builtins, checking for unique builtin declarations.
  1812. void NeonEmitter::genBuiltinsDef(raw_ostream &OS,
  1813. SmallVectorImpl<Intrinsic *> &Defs) {
  1814. OS << "#ifdef GET_NEON_BUILTINS\n";
  1815. // We only want to emit a builtin once, and we want to emit them in
  1816. // alphabetical order, so use a std::set.
  1817. std::set<std::string> Builtins;
  1818. for (auto *Def : Defs) {
  1819. if (Def->hasBody())
  1820. continue;
  1821. // Functions with 'a' (the splat code) in the type prototype should not get
  1822. // their own builtin as they use the non-splat variant.
  1823. if (Def->hasSplat())
  1824. continue;
  1825. std::string S = "BUILTIN(__builtin_neon_" + Def->getMangledName() + ", \"";
  1826. S += Def->getBuiltinTypeStr();
  1827. S += "\", \"n\")";
  1828. Builtins.insert(S);
  1829. }
  1830. for (auto &S : Builtins)
  1831. OS << S << "\n";
  1832. OS << "#endif\n\n";
  1833. }
  1834. /// Generate the ARM and AArch64 overloaded type checking code for
  1835. /// SemaChecking.cpp, checking for unique builtin declarations.
  1836. void NeonEmitter::genOverloadTypeCheckCode(raw_ostream &OS,
  1837. SmallVectorImpl<Intrinsic *> &Defs) {
  1838. OS << "#ifdef GET_NEON_OVERLOAD_CHECK\n";
  1839. // We record each overload check line before emitting because subsequent Inst
  1840. // definitions may extend the number of permitted types (i.e. augment the
  1841. // Mask). Use std::map to avoid sorting the table by hash number.
  1842. struct OverloadInfo {
  1843. uint64_t Mask;
  1844. int PtrArgNum;
  1845. bool HasConstPtr;
  1846. OverloadInfo() : Mask(0ULL), PtrArgNum(0), HasConstPtr(false) {}
  1847. };
  1848. std::map<std::string, OverloadInfo> OverloadMap;
  1849. for (auto *Def : Defs) {
  1850. // If the def has a body (that is, it has Operation DAGs), it won't call
  1851. // __builtin_neon_* so we don't need to generate a definition for it.
  1852. if (Def->hasBody())
  1853. continue;
  1854. // Functions with 'a' (the splat code) in the type prototype should not get
  1855. // their own builtin as they use the non-splat variant.
  1856. if (Def->hasSplat())
  1857. continue;
  1858. // Functions which have a scalar argument cannot be overloaded, no need to
  1859. // check them if we are emitting the type checking code.
  1860. if (Def->protoHasScalar())
  1861. continue;
  1862. uint64_t Mask = 0ULL;
  1863. Type Ty = Def->getReturnType();
  1864. if (Def->getProto()[0] == 'v' ||
  1865. isFloatingPointProtoModifier(Def->getProto()[0]))
  1866. Ty = Def->getParamType(0);
  1867. if (Ty.isPointer())
  1868. Ty = Def->getParamType(1);
  1869. Mask |= 1ULL << Ty.getNeonEnum();
  1870. // Check if the function has a pointer or const pointer argument.
  1871. std::string Proto = Def->getProto();
  1872. int PtrArgNum = -1;
  1873. bool HasConstPtr = false;
  1874. for (unsigned I = 0; I < Def->getNumParams(); ++I) {
  1875. char ArgType = Proto[I + 1];
  1876. if (ArgType == 'c') {
  1877. HasConstPtr = true;
  1878. PtrArgNum = I;
  1879. break;
  1880. }
  1881. if (ArgType == 'p') {
  1882. PtrArgNum = I;
  1883. break;
  1884. }
  1885. }
  1886. // For sret builtins, adjust the pointer argument index.
  1887. if (PtrArgNum >= 0 && Def->getReturnType().getNumVectors() > 1)
  1888. PtrArgNum += 1;
  1889. std::string Name = Def->getName();
  1890. // Omit type checking for the pointer arguments of vld1_lane, vld1_dup,
  1891. // and vst1_lane intrinsics. Using a pointer to the vector element
  1892. // type with one of those operations causes codegen to select an aligned
  1893. // load/store instruction. If you want an unaligned operation,
  1894. // the pointer argument needs to have less alignment than element type,
  1895. // so just accept any pointer type.
  1896. if (Name == "vld1_lane" || Name == "vld1_dup" || Name == "vst1_lane") {
  1897. PtrArgNum = -1;
  1898. HasConstPtr = false;
  1899. }
  1900. if (Mask) {
  1901. std::string Name = Def->getMangledName();
  1902. OverloadMap.insert(std::make_pair(Name, OverloadInfo()));
  1903. OverloadInfo &OI = OverloadMap[Name];
  1904. OI.Mask |= Mask;
  1905. OI.PtrArgNum |= PtrArgNum;
  1906. OI.HasConstPtr = HasConstPtr;
  1907. }
  1908. }
  1909. for (auto &I : OverloadMap) {
  1910. OverloadInfo &OI = I.second;
  1911. OS << "case NEON::BI__builtin_neon_" << I.first << ": ";
  1912. OS << "mask = 0x" << Twine::utohexstr(OI.Mask) << "ULL";
  1913. if (OI.PtrArgNum >= 0)
  1914. OS << "; PtrArgNum = " << OI.PtrArgNum;
  1915. if (OI.HasConstPtr)
  1916. OS << "; HasConstPtr = true";
  1917. OS << "; break;\n";
  1918. }
  1919. OS << "#endif\n\n";
  1920. }
  1921. void NeonEmitter::genIntrinsicRangeCheckCode(raw_ostream &OS,
  1922. SmallVectorImpl<Intrinsic *> &Defs) {
  1923. OS << "#ifdef GET_NEON_IMMEDIATE_CHECK\n";
  1924. std::set<std::string> Emitted;
  1925. for (auto *Def : Defs) {
  1926. if (Def->hasBody())
  1927. continue;
  1928. // Functions with 'a' (the splat code) in the type prototype should not get
  1929. // their own builtin as they use the non-splat variant.
  1930. if (Def->hasSplat())
  1931. continue;
  1932. // Functions which do not have an immediate do not need to have range
  1933. // checking code emitted.
  1934. if (!Def->hasImmediate())
  1935. continue;
  1936. if (Emitted.find(Def->getMangledName()) != Emitted.end())
  1937. continue;
  1938. std::string LowerBound, UpperBound;
  1939. Record *R = Def->getRecord();
  1940. if (R->getValueAsBit("isVCVT_N")) {
  1941. // VCVT between floating- and fixed-point values takes an immediate
  1942. // in the range [1, 32) for f32 or [1, 64) for f64 or [1, 16) for f16.
  1943. LowerBound = "1";
  1944. if (Def->getBaseType().getElementSizeInBits() == 16 ||
  1945. Def->getName().find('h') != std::string::npos)
  1946. // VCVTh operating on FP16 intrinsics in range [1, 16)
  1947. UpperBound = "15";
  1948. else if (Def->getBaseType().getElementSizeInBits() == 32)
  1949. UpperBound = "31";
  1950. else
  1951. UpperBound = "63";
  1952. } else if (R->getValueAsBit("isScalarShift")) {
  1953. // Right shifts have an 'r' in the name, left shifts do not. Convert
  1954. // instructions have the same bounds and right shifts.
  1955. if (Def->getName().find('r') != std::string::npos ||
  1956. Def->getName().find("cvt") != std::string::npos)
  1957. LowerBound = "1";
  1958. UpperBound = utostr(Def->getReturnType().getElementSizeInBits() - 1);
  1959. } else if (R->getValueAsBit("isShift")) {
  1960. // Builtins which are overloaded by type will need to have their upper
  1961. // bound computed at Sema time based on the type constant.
  1962. // Right shifts have an 'r' in the name, left shifts do not.
  1963. if (Def->getName().find('r') != std::string::npos)
  1964. LowerBound = "1";
  1965. UpperBound = "RFT(TV, true)";
  1966. } else if (Def->getClassKind(true) == ClassB) {
  1967. // ClassB intrinsics have a type (and hence lane number) that is only
  1968. // known at runtime.
  1969. if (R->getValueAsBit("isLaneQ"))
  1970. UpperBound = "RFT(TV, false, true)";
  1971. else
  1972. UpperBound = "RFT(TV, false, false)";
  1973. } else {
  1974. // The immediate generally refers to a lane in the preceding argument.
  1975. assert(Def->getImmediateIdx() > 0);
  1976. Type T = Def->getParamType(Def->getImmediateIdx() - 1);
  1977. UpperBound = utostr(T.getNumElements() - 1);
  1978. }
  1979. // Calculate the index of the immediate that should be range checked.
  1980. unsigned Idx = Def->getNumParams();
  1981. if (Def->hasImmediate())
  1982. Idx = Def->getGeneratedParamIdx(Def->getImmediateIdx());
  1983. OS << "case NEON::BI__builtin_neon_" << Def->getMangledName() << ": "
  1984. << "i = " << Idx << ";";
  1985. if (!LowerBound.empty())
  1986. OS << " l = " << LowerBound << ";";
  1987. if (!UpperBound.empty())
  1988. OS << " u = " << UpperBound << ";";
  1989. OS << " break;\n";
  1990. Emitted.insert(Def->getMangledName());
  1991. }
  1992. OS << "#endif\n\n";
  1993. }
  1994. /// runHeader - Emit a file with sections defining:
  1995. /// 1. the NEON section of BuiltinsARM.def and BuiltinsAArch64.def.
  1996. /// 2. the SemaChecking code for the type overload checking.
  1997. /// 3. the SemaChecking code for validation of intrinsic immediate arguments.
  1998. void NeonEmitter::runHeader(raw_ostream &OS) {
  1999. std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  2000. SmallVector<Intrinsic *, 128> Defs;
  2001. for (auto *R : RV)
  2002. createIntrinsic(R, Defs);
  2003. // Generate shared BuiltinsXXX.def
  2004. genBuiltinsDef(OS, Defs);
  2005. // Generate ARM overloaded type checking code for SemaChecking.cpp
  2006. genOverloadTypeCheckCode(OS, Defs);
  2007. // Generate ARM range checking code for shift/lane immediates.
  2008. genIntrinsicRangeCheckCode(OS, Defs);
  2009. }
  2010. /// run - Read the records in arm_neon.td and output arm_neon.h. arm_neon.h
  2011. /// is comprised of type definitions and function declarations.
  2012. void NeonEmitter::run(raw_ostream &OS) {
  2013. OS << "/*===---- arm_neon.h - ARM Neon intrinsics "
  2014. "------------------------------"
  2015. "---===\n"
  2016. " *\n"
  2017. " * Permission is hereby granted, free of charge, to any person "
  2018. "obtaining "
  2019. "a copy\n"
  2020. " * of this software and associated documentation files (the "
  2021. "\"Software\"),"
  2022. " to deal\n"
  2023. " * in the Software without restriction, including without limitation "
  2024. "the "
  2025. "rights\n"
  2026. " * to use, copy, modify, merge, publish, distribute, sublicense, "
  2027. "and/or sell\n"
  2028. " * copies of the Software, and to permit persons to whom the Software "
  2029. "is\n"
  2030. " * furnished to do so, subject to the following conditions:\n"
  2031. " *\n"
  2032. " * The above copyright notice and this permission notice shall be "
  2033. "included in\n"
  2034. " * all copies or substantial portions of the Software.\n"
  2035. " *\n"
  2036. " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
  2037. "EXPRESS OR\n"
  2038. " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
  2039. "MERCHANTABILITY,\n"
  2040. " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
  2041. "SHALL THE\n"
  2042. " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
  2043. "OTHER\n"
  2044. " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
  2045. "ARISING FROM,\n"
  2046. " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
  2047. "DEALINGS IN\n"
  2048. " * THE SOFTWARE.\n"
  2049. " *\n"
  2050. " *===-----------------------------------------------------------------"
  2051. "---"
  2052. "---===\n"
  2053. " */\n\n";
  2054. OS << "#ifndef __ARM_NEON_H\n";
  2055. OS << "#define __ARM_NEON_H\n\n";
  2056. OS << "#if !defined(__ARM_NEON)\n";
  2057. OS << "#error \"NEON support not enabled\"\n";
  2058. OS << "#endif\n\n";
  2059. OS << "#include <stdint.h>\n\n";
  2060. // Emit NEON-specific scalar typedefs.
  2061. OS << "typedef float float32_t;\n";
  2062. OS << "typedef __fp16 float16_t;\n";
  2063. OS << "#ifdef __aarch64__\n";
  2064. OS << "typedef double float64_t;\n";
  2065. OS << "#endif\n\n";
  2066. // For now, signedness of polynomial types depends on target
  2067. OS << "#ifdef __aarch64__\n";
  2068. OS << "typedef uint8_t poly8_t;\n";
  2069. OS << "typedef uint16_t poly16_t;\n";
  2070. OS << "typedef uint64_t poly64_t;\n";
  2071. OS << "typedef __uint128_t poly128_t;\n";
  2072. OS << "#else\n";
  2073. OS << "typedef int8_t poly8_t;\n";
  2074. OS << "typedef int16_t poly16_t;\n";
  2075. OS << "#endif\n";
  2076. // Emit Neon vector typedefs.
  2077. std::string TypedefTypes(
  2078. "cQcsQsiQilQlUcQUcUsQUsUiQUiUlQUlhQhfQfdQdPcQPcPsQPsPlQPl");
  2079. std::vector<TypeSpec> TDTypeVec = TypeSpec::fromTypeSpecs(TypedefTypes);
  2080. // Emit vector typedefs.
  2081. bool InIfdef = false;
  2082. for (auto &TS : TDTypeVec) {
  2083. bool IsA64 = false;
  2084. Type T(TS, 'd');
  2085. if (T.isDouble() || (T.isPoly() && T.isLong()))
  2086. IsA64 = true;
  2087. if (InIfdef && !IsA64) {
  2088. OS << "#endif\n";
  2089. InIfdef = false;
  2090. }
  2091. if (!InIfdef && IsA64) {
  2092. OS << "#ifdef __aarch64__\n";
  2093. InIfdef = true;
  2094. }
  2095. if (T.isPoly())
  2096. OS << "typedef __attribute__((neon_polyvector_type(";
  2097. else
  2098. OS << "typedef __attribute__((neon_vector_type(";
  2099. Type T2 = T;
  2100. T2.makeScalar();
  2101. OS << T.getNumElements() << "))) ";
  2102. OS << T2.str();
  2103. OS << " " << T.str() << ";\n";
  2104. }
  2105. if (InIfdef)
  2106. OS << "#endif\n";
  2107. OS << "\n";
  2108. // Emit struct typedefs.
  2109. InIfdef = false;
  2110. for (unsigned NumMembers = 2; NumMembers <= 4; ++NumMembers) {
  2111. for (auto &TS : TDTypeVec) {
  2112. bool IsA64 = false;
  2113. Type T(TS, 'd');
  2114. if (T.isDouble() || (T.isPoly() && T.isLong()))
  2115. IsA64 = true;
  2116. if (InIfdef && !IsA64) {
  2117. OS << "#endif\n";
  2118. InIfdef = false;
  2119. }
  2120. if (!InIfdef && IsA64) {
  2121. OS << "#ifdef __aarch64__\n";
  2122. InIfdef = true;
  2123. }
  2124. char M = '2' + (NumMembers - 2);
  2125. Type VT(TS, M);
  2126. OS << "typedef struct " << VT.str() << " {\n";
  2127. OS << " " << T.str() << " val";
  2128. OS << "[" << NumMembers << "]";
  2129. OS << ";\n} ";
  2130. OS << VT.str() << ";\n";
  2131. OS << "\n";
  2132. }
  2133. }
  2134. if (InIfdef)
  2135. OS << "#endif\n";
  2136. OS << "\n";
  2137. OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
  2138. "__nodebug__))\n\n";
  2139. SmallVector<Intrinsic *, 128> Defs;
  2140. std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  2141. for (auto *R : RV)
  2142. createIntrinsic(R, Defs);
  2143. for (auto *I : Defs)
  2144. I->indexBody();
  2145. llvm::stable_sort(Defs, llvm::deref<std::less<>>());
  2146. // Only emit a def when its requirements have been met.
  2147. // FIXME: This loop could be made faster, but it's fast enough for now.
  2148. bool MadeProgress = true;
  2149. std::string InGuard;
  2150. while (!Defs.empty() && MadeProgress) {
  2151. MadeProgress = false;
  2152. for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
  2153. I != Defs.end(); /*No step*/) {
  2154. bool DependenciesSatisfied = true;
  2155. for (auto *II : (*I)->getDependencies()) {
  2156. if (llvm::is_contained(Defs, II))
  2157. DependenciesSatisfied = false;
  2158. }
  2159. if (!DependenciesSatisfied) {
  2160. // Try the next one.
  2161. ++I;
  2162. continue;
  2163. }
  2164. // Emit #endif/#if pair if needed.
  2165. if ((*I)->getGuard() != InGuard) {
  2166. if (!InGuard.empty())
  2167. OS << "#endif\n";
  2168. InGuard = (*I)->getGuard();
  2169. if (!InGuard.empty())
  2170. OS << "#if " << InGuard << "\n";
  2171. }
  2172. // Actually generate the intrinsic code.
  2173. OS << (*I)->generate();
  2174. MadeProgress = true;
  2175. I = Defs.erase(I);
  2176. }
  2177. }
  2178. assert(Defs.empty() && "Some requirements were not satisfied!");
  2179. if (!InGuard.empty())
  2180. OS << "#endif\n";
  2181. OS << "\n";
  2182. OS << "#undef __ai\n\n";
  2183. OS << "#endif /* __ARM_NEON_H */\n";
  2184. }
  2185. /// run - Read the records in arm_fp16.td and output arm_fp16.h. arm_fp16.h
  2186. /// is comprised of type definitions and function declarations.
  2187. void NeonEmitter::runFP16(raw_ostream &OS) {
  2188. OS << "/*===---- arm_fp16.h - ARM FP16 intrinsics "
  2189. "------------------------------"
  2190. "---===\n"
  2191. " *\n"
  2192. " * Permission is hereby granted, free of charge, to any person "
  2193. "obtaining a copy\n"
  2194. " * of this software and associated documentation files (the "
  2195. "\"Software\"), to deal\n"
  2196. " * in the Software without restriction, including without limitation "
  2197. "the rights\n"
  2198. " * to use, copy, modify, merge, publish, distribute, sublicense, "
  2199. "and/or sell\n"
  2200. " * copies of the Software, and to permit persons to whom the Software "
  2201. "is\n"
  2202. " * furnished to do so, subject to the following conditions:\n"
  2203. " *\n"
  2204. " * The above copyright notice and this permission notice shall be "
  2205. "included in\n"
  2206. " * all copies or substantial portions of the Software.\n"
  2207. " *\n"
  2208. " * THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, "
  2209. "EXPRESS OR\n"
  2210. " * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF "
  2211. "MERCHANTABILITY,\n"
  2212. " * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT "
  2213. "SHALL THE\n"
  2214. " * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR "
  2215. "OTHER\n"
  2216. " * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, "
  2217. "ARISING FROM,\n"
  2218. " * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER "
  2219. "DEALINGS IN\n"
  2220. " * THE SOFTWARE.\n"
  2221. " *\n"
  2222. " *===-----------------------------------------------------------------"
  2223. "---"
  2224. "---===\n"
  2225. " */\n\n";
  2226. OS << "#ifndef __ARM_FP16_H\n";
  2227. OS << "#define __ARM_FP16_H\n\n";
  2228. OS << "#include <stdint.h>\n\n";
  2229. OS << "typedef __fp16 float16_t;\n";
  2230. OS << "#define __ai static __inline__ __attribute__((__always_inline__, "
  2231. "__nodebug__))\n\n";
  2232. SmallVector<Intrinsic *, 128> Defs;
  2233. std::vector<Record *> RV = Records.getAllDerivedDefinitions("Inst");
  2234. for (auto *R : RV)
  2235. createIntrinsic(R, Defs);
  2236. for (auto *I : Defs)
  2237. I->indexBody();
  2238. llvm::stable_sort(Defs, llvm::deref<std::less<>>());
  2239. // Only emit a def when its requirements have been met.
  2240. // FIXME: This loop could be made faster, but it's fast enough for now.
  2241. bool MadeProgress = true;
  2242. std::string InGuard;
  2243. while (!Defs.empty() && MadeProgress) {
  2244. MadeProgress = false;
  2245. for (SmallVector<Intrinsic *, 128>::iterator I = Defs.begin();
  2246. I != Defs.end(); /*No step*/) {
  2247. bool DependenciesSatisfied = true;
  2248. for (auto *II : (*I)->getDependencies()) {
  2249. if (llvm::is_contained(Defs, II))
  2250. DependenciesSatisfied = false;
  2251. }
  2252. if (!DependenciesSatisfied) {
  2253. // Try the next one.
  2254. ++I;
  2255. continue;
  2256. }
  2257. // Emit #endif/#if pair if needed.
  2258. if ((*I)->getGuard() != InGuard) {
  2259. if (!InGuard.empty())
  2260. OS << "#endif\n";
  2261. InGuard = (*I)->getGuard();
  2262. if (!InGuard.empty())
  2263. OS << "#if " << InGuard << "\n";
  2264. }
  2265. // Actually generate the intrinsic code.
  2266. OS << (*I)->generate();
  2267. MadeProgress = true;
  2268. I = Defs.erase(I);
  2269. }
  2270. }
  2271. assert(Defs.empty() && "Some requirements were not satisfied!");
  2272. if (!InGuard.empty())
  2273. OS << "#endif\n";
  2274. OS << "\n";
  2275. OS << "#undef __ai\n\n";
  2276. OS << "#endif /* __ARM_FP16_H */\n";
  2277. }
  2278. void clang::EmitNeon(RecordKeeper &Records, raw_ostream &OS) {
  2279. NeonEmitter(Records).run(OS);
  2280. }
  2281. void clang::EmitFP16(RecordKeeper &Records, raw_ostream &OS) {
  2282. NeonEmitter(Records).runFP16(OS);
  2283. }
  2284. void clang::EmitNeonSema(RecordKeeper &Records, raw_ostream &OS) {
  2285. NeonEmitter(Records).runHeader(OS);
  2286. }
  2287. void clang::EmitNeonTest(RecordKeeper &Records, raw_ostream &OS) {
  2288. llvm_unreachable("Neon test generation no longer implemented!");
  2289. }