2
0

milkymist-pfpu.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. /*
  2. * QEMU model of the Milkymist programmable FPU.
  3. *
  4. * Copyright (c) 2010 Michael Walle <michael@walle.cc>
  5. *
  6. * This library is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2 of the License, or (at your option) any later version.
  10. *
  11. * This library is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  18. *
  19. *
  20. * Specification available at:
  21. * http://milkymist.walle.cc/socdoc/pfpu.pdf
  22. *
  23. */
  24. #include "qemu/osdep.h"
  25. #include "hw/irq.h"
  26. #include "hw/sysbus.h"
  27. #include "migration/vmstate.h"
  28. #include "trace.h"
  29. #include "qemu/log.h"
  30. #include "qemu/module.h"
  31. #include "qemu/error-report.h"
  32. #include <math.h>
  33. /* #define TRACE_EXEC */
  34. #ifdef TRACE_EXEC
  35. # define D_EXEC(x) x
  36. #else
  37. # define D_EXEC(x)
  38. #endif
  39. enum {
  40. R_CTL = 0,
  41. R_MESHBASE,
  42. R_HMESHLAST,
  43. R_VMESHLAST,
  44. R_CODEPAGE,
  45. R_VERTICES,
  46. R_COLLISIONS,
  47. R_STRAYWRITES,
  48. R_LASTDMA,
  49. R_PC,
  50. R_DREGBASE,
  51. R_CODEBASE,
  52. R_MAX
  53. };
  54. enum {
  55. CTL_START_BUSY = (1<<0),
  56. };
  57. enum {
  58. OP_NOP = 0,
  59. OP_FADD,
  60. OP_FSUB,
  61. OP_FMUL,
  62. OP_FABS,
  63. OP_F2I,
  64. OP_I2F,
  65. OP_VECTOUT,
  66. OP_SIN,
  67. OP_COS,
  68. OP_ABOVE,
  69. OP_EQUAL,
  70. OP_COPY,
  71. OP_IF,
  72. OP_TSIGN,
  73. OP_QUAKE,
  74. };
  75. enum {
  76. GPR_X = 0,
  77. GPR_Y = 1,
  78. GPR_FLAGS = 2,
  79. };
  80. enum {
  81. LATENCY_FADD = 5,
  82. LATENCY_FSUB = 5,
  83. LATENCY_FMUL = 7,
  84. LATENCY_FABS = 2,
  85. LATENCY_F2I = 2,
  86. LATENCY_I2F = 3,
  87. LATENCY_VECTOUT = 0,
  88. LATENCY_SIN = 4,
  89. LATENCY_COS = 4,
  90. LATENCY_ABOVE = 2,
  91. LATENCY_EQUAL = 2,
  92. LATENCY_COPY = 2,
  93. LATENCY_IF = 2,
  94. LATENCY_TSIGN = 2,
  95. LATENCY_QUAKE = 2,
  96. MAX_LATENCY = 7
  97. };
  98. #define GPR_BEGIN 0x100
  99. #define GPR_END 0x17f
  100. #define MICROCODE_BEGIN 0x200
  101. #define MICROCODE_END 0x3ff
  102. #define MICROCODE_WORDS 2048
  103. #define REINTERPRET_CAST(type, val) (*((type *)&(val)))
  104. #ifdef TRACE_EXEC
  105. static const char *opcode_to_str[] = {
  106. "NOP", "FADD", "FSUB", "FMUL", "FABS", "F2I", "I2F", "VECTOUT",
  107. "SIN", "COS", "ABOVE", "EQUAL", "COPY", "IF", "TSIGN", "QUAKE",
  108. };
  109. #endif
  110. #define TYPE_MILKYMIST_PFPU "milkymist-pfpu"
  111. #define MILKYMIST_PFPU(obj) \
  112. OBJECT_CHECK(MilkymistPFPUState, (obj), TYPE_MILKYMIST_PFPU)
  113. struct MilkymistPFPUState {
  114. SysBusDevice parent_obj;
  115. MemoryRegion regs_region;
  116. Chardev *chr;
  117. qemu_irq irq;
  118. uint32_t regs[R_MAX];
  119. uint32_t gp_regs[128];
  120. uint32_t microcode[MICROCODE_WORDS];
  121. int output_queue_pos;
  122. uint32_t output_queue[MAX_LATENCY];
  123. };
  124. typedef struct MilkymistPFPUState MilkymistPFPUState;
  125. static inline uint32_t
  126. get_dma_address(uint32_t base, uint32_t x, uint32_t y)
  127. {
  128. return base + 8 * (128 * y + x);
  129. }
  130. static inline void
  131. output_queue_insert(MilkymistPFPUState *s, uint32_t val, int pos)
  132. {
  133. s->output_queue[(s->output_queue_pos + pos) % MAX_LATENCY] = val;
  134. }
  135. static inline uint32_t
  136. output_queue_remove(MilkymistPFPUState *s)
  137. {
  138. return s->output_queue[s->output_queue_pos];
  139. }
  140. static inline void
  141. output_queue_advance(MilkymistPFPUState *s)
  142. {
  143. s->output_queue[s->output_queue_pos] = 0;
  144. s->output_queue_pos = (s->output_queue_pos + 1) % MAX_LATENCY;
  145. }
  146. static int pfpu_decode_insn(MilkymistPFPUState *s)
  147. {
  148. uint32_t pc = s->regs[R_PC];
  149. uint32_t insn = s->microcode[pc];
  150. uint32_t reg_a = (insn >> 18) & 0x7f;
  151. uint32_t reg_b = (insn >> 11) & 0x7f;
  152. uint32_t op = (insn >> 7) & 0xf;
  153. uint32_t reg_d = insn & 0x7f;
  154. uint32_t r = 0;
  155. int latency = 0;
  156. switch (op) {
  157. case OP_NOP:
  158. break;
  159. case OP_FADD:
  160. {
  161. float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
  162. float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
  163. float t = a + b;
  164. r = REINTERPRET_CAST(uint32_t, t);
  165. latency = LATENCY_FADD;
  166. D_EXEC(qemu_log("ADD a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
  167. } break;
  168. case OP_FSUB:
  169. {
  170. float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
  171. float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
  172. float t = a - b;
  173. r = REINTERPRET_CAST(uint32_t, t);
  174. latency = LATENCY_FSUB;
  175. D_EXEC(qemu_log("SUB a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
  176. } break;
  177. case OP_FMUL:
  178. {
  179. float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
  180. float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
  181. float t = a * b;
  182. r = REINTERPRET_CAST(uint32_t, t);
  183. latency = LATENCY_FMUL;
  184. D_EXEC(qemu_log("MUL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
  185. } break;
  186. case OP_FABS:
  187. {
  188. float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
  189. float t = fabsf(a);
  190. r = REINTERPRET_CAST(uint32_t, t);
  191. latency = LATENCY_FABS;
  192. D_EXEC(qemu_log("ABS a=%f t=%f, r=%08x\n", a, t, r));
  193. } break;
  194. case OP_F2I:
  195. {
  196. float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
  197. int32_t t = a;
  198. r = REINTERPRET_CAST(uint32_t, t);
  199. latency = LATENCY_F2I;
  200. D_EXEC(qemu_log("F2I a=%f t=%d, r=%08x\n", a, t, r));
  201. } break;
  202. case OP_I2F:
  203. {
  204. int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
  205. float t = a;
  206. r = REINTERPRET_CAST(uint32_t, t);
  207. latency = LATENCY_I2F;
  208. D_EXEC(qemu_log("I2F a=%08x t=%f, r=%08x\n", a, t, r));
  209. } break;
  210. case OP_VECTOUT:
  211. {
  212. uint32_t a = cpu_to_be32(s->gp_regs[reg_a]);
  213. uint32_t b = cpu_to_be32(s->gp_regs[reg_b]);
  214. hwaddr dma_ptr =
  215. get_dma_address(s->regs[R_MESHBASE],
  216. s->gp_regs[GPR_X], s->gp_regs[GPR_Y]);
  217. cpu_physical_memory_write(dma_ptr, &a, 4);
  218. cpu_physical_memory_write(dma_ptr + 4, &b, 4);
  219. s->regs[R_LASTDMA] = dma_ptr + 4;
  220. D_EXEC(qemu_log("VECTOUT a=%08x b=%08x dma=%08x\n", a, b, dma_ptr));
  221. trace_milkymist_pfpu_vectout(a, b, dma_ptr);
  222. } break;
  223. case OP_SIN:
  224. {
  225. int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
  226. float t = sinf(a * (1.0f / (M_PI * 4096.0f)));
  227. r = REINTERPRET_CAST(uint32_t, t);
  228. latency = LATENCY_SIN;
  229. D_EXEC(qemu_log("SIN a=%d t=%f, r=%08x\n", a, t, r));
  230. } break;
  231. case OP_COS:
  232. {
  233. int32_t a = REINTERPRET_CAST(int32_t, s->gp_regs[reg_a]);
  234. float t = cosf(a * (1.0f / (M_PI * 4096.0f)));
  235. r = REINTERPRET_CAST(uint32_t, t);
  236. latency = LATENCY_COS;
  237. D_EXEC(qemu_log("COS a=%d t=%f, r=%08x\n", a, t, r));
  238. } break;
  239. case OP_ABOVE:
  240. {
  241. float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
  242. float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
  243. float t = (a > b) ? 1.0f : 0.0f;
  244. r = REINTERPRET_CAST(uint32_t, t);
  245. latency = LATENCY_ABOVE;
  246. D_EXEC(qemu_log("ABOVE a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
  247. } break;
  248. case OP_EQUAL:
  249. {
  250. float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
  251. float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
  252. float t = (a == b) ? 1.0f : 0.0f;
  253. r = REINTERPRET_CAST(uint32_t, t);
  254. latency = LATENCY_EQUAL;
  255. D_EXEC(qemu_log("EQUAL a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
  256. } break;
  257. case OP_COPY:
  258. {
  259. r = s->gp_regs[reg_a];
  260. latency = LATENCY_COPY;
  261. D_EXEC(qemu_log("COPY"));
  262. } break;
  263. case OP_IF:
  264. {
  265. float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
  266. float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
  267. uint32_t f = s->gp_regs[GPR_FLAGS];
  268. float t = (f != 0) ? a : b;
  269. r = REINTERPRET_CAST(uint32_t, t);
  270. latency = LATENCY_IF;
  271. D_EXEC(qemu_log("IF f=%u a=%f b=%f t=%f, r=%08x\n", f, a, b, t, r));
  272. } break;
  273. case OP_TSIGN:
  274. {
  275. float a = REINTERPRET_CAST(float, s->gp_regs[reg_a]);
  276. float b = REINTERPRET_CAST(float, s->gp_regs[reg_b]);
  277. float t = (b < 0) ? -a : a;
  278. r = REINTERPRET_CAST(uint32_t, t);
  279. latency = LATENCY_TSIGN;
  280. D_EXEC(qemu_log("TSIGN a=%f b=%f t=%f, r=%08x\n", a, b, t, r));
  281. } break;
  282. case OP_QUAKE:
  283. {
  284. uint32_t a = s->gp_regs[reg_a];
  285. r = 0x5f3759df - (a >> 1);
  286. latency = LATENCY_QUAKE;
  287. D_EXEC(qemu_log("QUAKE a=%d r=%08x\n", a, r));
  288. } break;
  289. default:
  290. error_report("milkymist_pfpu: unknown opcode %d", op);
  291. break;
  292. }
  293. if (!reg_d) {
  294. D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d>\n",
  295. s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
  296. s->regs[R_PC] + latency));
  297. } else {
  298. D_EXEC(qemu_log("%04d %8s R%03d, R%03d <L=%d, E=%04d> -> R%03d\n",
  299. s->regs[R_PC], opcode_to_str[op], reg_a, reg_b, latency,
  300. s->regs[R_PC] + latency, reg_d));
  301. }
  302. if (op == OP_VECTOUT) {
  303. return 0;
  304. }
  305. /* store output for this cycle */
  306. if (reg_d) {
  307. uint32_t val = output_queue_remove(s);
  308. D_EXEC(qemu_log("R%03d <- 0x%08x\n", reg_d, val));
  309. s->gp_regs[reg_d] = val;
  310. }
  311. output_queue_advance(s);
  312. /* store op output */
  313. if (op != OP_NOP) {
  314. output_queue_insert(s, r, latency-1);
  315. }
  316. /* advance PC */
  317. s->regs[R_PC]++;
  318. return 1;
  319. };
  320. static void pfpu_start(MilkymistPFPUState *s)
  321. {
  322. int x, y;
  323. int i;
  324. for (y = 0; y <= s->regs[R_VMESHLAST]; y++) {
  325. for (x = 0; x <= s->regs[R_HMESHLAST]; x++) {
  326. D_EXEC(qemu_log("\nprocessing x=%d y=%d\n", x, y));
  327. /* set current position */
  328. s->gp_regs[GPR_X] = x;
  329. s->gp_regs[GPR_Y] = y;
  330. /* run microcode on this position */
  331. i = 0;
  332. while (pfpu_decode_insn(s)) {
  333. /* decode at most MICROCODE_WORDS instructions */
  334. if (++i >= MICROCODE_WORDS) {
  335. error_report("milkymist_pfpu: too many instructions "
  336. "executed in microcode. No VECTOUT?");
  337. break;
  338. }
  339. }
  340. /* reset pc for next run */
  341. s->regs[R_PC] = 0;
  342. }
  343. }
  344. s->regs[R_VERTICES] = x * y;
  345. trace_milkymist_pfpu_pulse_irq();
  346. qemu_irq_pulse(s->irq);
  347. }
  348. static inline int get_microcode_address(MilkymistPFPUState *s, uint32_t addr)
  349. {
  350. return (512 * s->regs[R_CODEPAGE]) + addr - MICROCODE_BEGIN;
  351. }
  352. static uint64_t pfpu_read(void *opaque, hwaddr addr,
  353. unsigned size)
  354. {
  355. MilkymistPFPUState *s = opaque;
  356. uint32_t r = 0;
  357. addr >>= 2;
  358. switch (addr) {
  359. case R_CTL:
  360. case R_MESHBASE:
  361. case R_HMESHLAST:
  362. case R_VMESHLAST:
  363. case R_CODEPAGE:
  364. case R_VERTICES:
  365. case R_COLLISIONS:
  366. case R_STRAYWRITES:
  367. case R_LASTDMA:
  368. case R_PC:
  369. case R_DREGBASE:
  370. case R_CODEBASE:
  371. r = s->regs[addr];
  372. break;
  373. case GPR_BEGIN ... GPR_END:
  374. r = s->gp_regs[addr - GPR_BEGIN];
  375. break;
  376. case MICROCODE_BEGIN ... MICROCODE_END:
  377. r = s->microcode[get_microcode_address(s, addr)];
  378. break;
  379. default:
  380. error_report("milkymist_pfpu: read access to unknown register 0x"
  381. TARGET_FMT_plx, addr << 2);
  382. break;
  383. }
  384. trace_milkymist_pfpu_memory_read(addr << 2, r);
  385. return r;
  386. }
  387. static void pfpu_write(void *opaque, hwaddr addr, uint64_t value,
  388. unsigned size)
  389. {
  390. MilkymistPFPUState *s = opaque;
  391. trace_milkymist_pfpu_memory_write(addr, value);
  392. addr >>= 2;
  393. switch (addr) {
  394. case R_CTL:
  395. if (value & CTL_START_BUSY) {
  396. pfpu_start(s);
  397. }
  398. break;
  399. case R_MESHBASE:
  400. case R_HMESHLAST:
  401. case R_VMESHLAST:
  402. case R_CODEPAGE:
  403. case R_VERTICES:
  404. case R_COLLISIONS:
  405. case R_STRAYWRITES:
  406. case R_LASTDMA:
  407. case R_PC:
  408. case R_DREGBASE:
  409. case R_CODEBASE:
  410. s->regs[addr] = value;
  411. break;
  412. case GPR_BEGIN ... GPR_END:
  413. s->gp_regs[addr - GPR_BEGIN] = value;
  414. break;
  415. case MICROCODE_BEGIN ... MICROCODE_END:
  416. s->microcode[get_microcode_address(s, addr)] = value;
  417. break;
  418. default:
  419. error_report("milkymist_pfpu: write access to unknown register 0x"
  420. TARGET_FMT_plx, addr << 2);
  421. break;
  422. }
  423. }
  424. static const MemoryRegionOps pfpu_mmio_ops = {
  425. .read = pfpu_read,
  426. .write = pfpu_write,
  427. .valid = {
  428. .min_access_size = 4,
  429. .max_access_size = 4,
  430. },
  431. .endianness = DEVICE_NATIVE_ENDIAN,
  432. };
  433. static void milkymist_pfpu_reset(DeviceState *d)
  434. {
  435. MilkymistPFPUState *s = MILKYMIST_PFPU(d);
  436. int i;
  437. for (i = 0; i < R_MAX; i++) {
  438. s->regs[i] = 0;
  439. }
  440. for (i = 0; i < 128; i++) {
  441. s->gp_regs[i] = 0;
  442. }
  443. for (i = 0; i < MICROCODE_WORDS; i++) {
  444. s->microcode[i] = 0;
  445. }
  446. s->output_queue_pos = 0;
  447. for (i = 0; i < MAX_LATENCY; i++) {
  448. s->output_queue[i] = 0;
  449. }
  450. }
  451. static void milkymist_pfpu_realize(DeviceState *dev, Error **errp)
  452. {
  453. MilkymistPFPUState *s = MILKYMIST_PFPU(dev);
  454. SysBusDevice *sbd = SYS_BUS_DEVICE(dev);
  455. sysbus_init_irq(sbd, &s->irq);
  456. memory_region_init_io(&s->regs_region, OBJECT(dev), &pfpu_mmio_ops, s,
  457. "milkymist-pfpu", MICROCODE_END * 4);
  458. sysbus_init_mmio(sbd, &s->regs_region);
  459. }
  460. static const VMStateDescription vmstate_milkymist_pfpu = {
  461. .name = "milkymist-pfpu",
  462. .version_id = 1,
  463. .minimum_version_id = 1,
  464. .fields = (VMStateField[]) {
  465. VMSTATE_UINT32_ARRAY(regs, MilkymistPFPUState, R_MAX),
  466. VMSTATE_UINT32_ARRAY(gp_regs, MilkymistPFPUState, 128),
  467. VMSTATE_UINT32_ARRAY(microcode, MilkymistPFPUState, MICROCODE_WORDS),
  468. VMSTATE_INT32(output_queue_pos, MilkymistPFPUState),
  469. VMSTATE_UINT32_ARRAY(output_queue, MilkymistPFPUState, MAX_LATENCY),
  470. VMSTATE_END_OF_LIST()
  471. }
  472. };
  473. static void milkymist_pfpu_class_init(ObjectClass *klass, void *data)
  474. {
  475. DeviceClass *dc = DEVICE_CLASS(klass);
  476. dc->realize = milkymist_pfpu_realize;
  477. dc->reset = milkymist_pfpu_reset;
  478. dc->vmsd = &vmstate_milkymist_pfpu;
  479. }
  480. static const TypeInfo milkymist_pfpu_info = {
  481. .name = TYPE_MILKYMIST_PFPU,
  482. .parent = TYPE_SYS_BUS_DEVICE,
  483. .instance_size = sizeof(MilkymistPFPUState),
  484. .class_init = milkymist_pfpu_class_init,
  485. };
  486. static void milkymist_pfpu_register_types(void)
  487. {
  488. type_register_static(&milkymist_pfpu_info);
  489. }
  490. type_init(milkymist_pfpu_register_types)