MetalSource.swift 1.1 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546
  1. //
  2. // MetalSource.swift
  3. // gputest
  4. //
  5. // Created by xingcheng on 2025/5/18.
  6. //
  7. let metalCode = """
  8. #include <metal_stdlib>
  9. using namespace metal;
  10. kernel void optimized_fp32(
  11. device float4 *buffer [[buffer(0)]],
  12. uint tid [[thread_position_in_grid]]
  13. ) {
  14. // 每个线程处理4个float元素
  15. float4 x = buffer[tid];
  16. // 展开循环增加计算强度
  17. for (int i = 0; i < 512; ++i) {
  18. // 每个循环8次浮点运算 x4 => 32 FLOPs/循环
  19. x = fma(x, float4(1.01), float4(0.97));
  20. x = fma(x, float4(0.99), float4(1.02));
  21. x = fma(x, float4(1.03), float4(0.98));
  22. x = fma(x, float4(0.96), float4(1.05));
  23. }
  24. buffer[tid] = x;
  25. }
  26. kernel void optimized_fp16(
  27. device half4 *buffer [[buffer(0)]],
  28. uint tid [[thread_position_in_grid]]
  29. ) {
  30. half4 x = buffer[tid];
  31. for (int i = 0; i < 512; ++i) {
  32. x = fma(x, half4(1.01), half4(0.97));
  33. x = fma(x, half4(0.99), half4(1.02));
  34. x = fma(x, half4(1.03), half4(0.98));
  35. x = fma(x, half4(0.96), half4(1.05));
  36. }
  37. buffer[tid] = x;
  38. }
  39. """