2009-02-09
AOベンチ高速化計画(その1)
|AOベンチを高速化してみましょうという企画を始めます。まずは、プロファイルを取ってみました。いつの間にかエンバグしてプロファイラが動かなくなってました。とりあえず直したので、試したい人(果たしているのだろうか?)は最新版にしておいてください。
結果を示します。左の数字がその行で費やしたクロック数*1です。
ちょっとみると、オブジェクトを作るところと、アクセサで時間を食っているみたいです。
これをどう料理するのか、方針はあるのですが具体的にどうするのか全然考えていないのでした。
続く(のか?)
1: # AO render benchmark 2: # Original program (C) Syoyo Fujita in Javascript (and other languages) 3: # http://lucille.atso-net.jp/blog/?p=642 4: # http://lucille.atso-net.jp/blog/?p=711 5: # Ruby(yarv2llvm) version by Hideki Miura 6: # 7: 8: IMAGE_WIDTH = 128 9: IMAGE_HEIGHT = 128 10: NSUBSAMPLES = 2 11: NAO_SAMPLES = 4 12: 13: =begin 14: def rand 15: 0.5 16: end 17: =end 18: 19: class Vec 3482907111 20: def initialize(x, y, z) 7745809730 21: @x = x 8049657700 22: @y = y 89874967912 23: @z = z 24: end 25: 214036759 26: def x=(v); @x = v; end 164287218 27: def y=(v); @y = v; end 179976792 28: def z=(v); @z = v; end 92319516647 29: def x; @x; end 9066123677 30: def y; @y; end 6470660511 31: def z; @z; end 32: 33: def vadd(b) 34: Vec.new(@x + b.x, @y + b.y, @z + b.z) 35: end 36: 202794523 37: def vsub(b) 799888247 38: Vec.new(@x - b.x, @y - b.y, @z - b.z) 39: end 40: 7363025 41: def vcross(b) 28365353 42: Vec.new(@y * b.z - @z * b.y, 43: @z * b.x - @x * b.z, 44: @x * b.y - @y * b.x) 45: end 46: 611327399 47: def vdot(b) 2277799286 48: @x * b.x + @y * b.y + @z * b.z 49: end 50: 19571071 51: def vlength 71391784 52: Math.sqrt(@x * @x + @y * @y + @z * @z) 53: end 54: 20394222 55: def vnormalize 154529678 56: len = vlength 2454418673 57: v = Vec.new(@x, @y, @z) 19759771 58: if len > 1.0e-17 then 120984490 59: v.x = v.x / len 105079075 60: v.y = v.y / len 93239809 61: v.z = v.z / len 62: end 132115526 63: v 64: end 65: end 66: 67: 68: class Sphere 294 69: def initialize(center, radius) 12757 70: @center = center 21694 71: @radius = radius 72: end 73: 74: def center; @center; end 75: def radius; @radius; end 76: 197566836 77: def intersect(ray, isect) 549177301 78: rs = ray.org.vsub(@center) 1025943933 79: b = rs.vdot(ray.dir) 1051817735 80: c = rs.vdot(rs) - (@radius * @radius) 203202707 81: d = b * b - c 451149528 82: if d > 0.0 then 14470075 83: t = - b - Math.sqrt(d) 84: 63325012 85: if t > 0.0 and t < isect.t then 51692972 86: isect.t = t 49138790 87: isect.hit = true 14762316 88: isect.pl = Vec.new(ray.org.x + ray.dir.x * t, 89: ray.org.y + ray.dir.y * t, 90: ray.org.z + ray.dir.z * t) 40683471 91: n = isect.pl.vsub(@center) 38178484 92: isect.n = n.vnormalize 93: else 29409513 94: 0.0 95: end 96: end 97: nil 98: end 99: end 100: 101: class Plane 105 102: def initialize(p, n) 11067 103: @p = p 848920391 104: @n = n 105: end 106: 66415232 107: def intersect(ray, isect) 284177670 108: d = -@p.vdot(@n) 218832810 109: v = ray.dir.vdot(@n) 66846341 110: v0 = v 72313262 111: if v < 0.0 then 7918877 112: v0 = -v 113: end 65515935 114: if v0 < 1.0e-17 then 32313 115: return 116: end 117: 237211844 118: t = -(ray.org.vdot(@n) + d) / v 119: 213796475 120: if t > 0.0 and t < isect.t then 21664931 121: isect.hit = true 24819490 122: isect.t = t 28158267 123: isect.n = @n 21091378 124: isect.pl = Vec.new(ray.org.x + t * ray.dir.x, 125: ray.org.y + t * ray.dir.y, 126: ray.org.z + t * ray.dir.z) 127: end 128: nil 129: end 130: end 131: 132: class Ray 68183865 133: def initialize(org, dir) 464286359 134: @org = org 5796958193 135: @dir = dir 136: end 137: 2653505502 138: def org; @org; end 139: def org=(v); @org = v; end 2599960532 140: def dir; @dir; end 141: def dir=(v); @dir = v; end 142: end 143: 144: class Isect 65991211 145: def initialize 399979619 146: @t = 10000000.0 141381218 147: @hit = false 1506247178 148: @pl = Vec.new(0.0, 0.0, 0.0) 1992140835 149: @n = Vec.new(0.0, 0.0, 0.0) 150: end 151: 62780868 152: def t; @t; end 69180946 153: def t=(v); @t = v; end 310561402 154: def hit; @hit; end 49700965 155: def hit=(v); @hit = v; end 111828741 156: def pl; @pl; end 70391819 157: def pl=(v); @pl = v; end 341768739 158: def n; @n; end 53883525 159: def n=(v); @n = v; end 160: end 161: 4964604 162: def clamp(f) 5013257 163: i = f * 255.5 5809394 164: if i > 255.0 then 345576 165: i = 255.0 166: end 5108489 167: if i < 0.0 then 168: i = 0.0 169: end 635615432 170: i.to_i 171: end 172: 3553251 173: def otherBasis(basis, n) 13719777 174: basis[2] = Vec.new(n.x, n.y, n.z) 276236476 175: basis[1] = Vec.new(0.0, 0.0, 0.0) 176: 20436274 177: if n.x < 0.6 and n.x > -0.6 then 27269672 178: basis[1].x = 1.0 179: elsif n.y < 0.6 and n.y > -0.6 then 756300 180: basis[1].y = 1.0 181: elsif n.z < 0.6 and n.z > -0.6 then 67901 182: basis[1].z = 1.0 183: else 184: basis[1].x = 1.0 185: end 186: 126451711 187: basis[0] = basis[1].vcross(basis[2]) 40735089 188: basis[0] = basis[0].vnormalize 189: 29057644 190: basis[1] = basis[2].vcross(basis[0]) 39266610 191: basis[1] = basis[1].vnormalize 192: end 193: 194: class Scene 95 195: def initialize 34619 196: @spheres = Array.new 162606024 197: @spheres[0] = Sphere.new(Vec.new(-2.0, 0.0, -3.5), 0.5) 12463 198: @spheres[1] = Sphere.new(Vec.new(-0.5, 0.0, -3.0), 0.5) 2163 199: @spheres[2] = Sphere.new(Vec.new(1.0, 0.0, -2.2), 0.5) 2027 200: @plane = Plane.new(Vec.new(0.0, -0.5, 0.0), Vec.new(0.0, 1.0, 0.0)) 201: end 202: 3991300 203: def ambient_occlusion(isect) 86129974 204: basis = Array.new 42215920 205: otherBasis(basis, isect.n) 206: 3628173 207: ntheta = NAO_SAMPLES 3554923 208: nphi = NAO_SAMPLES 3907605 209: eps = 0.0001 3585756 210: occlusion = 0.0 211: 14516153 212: p0 = Vec.new(isect.pl.x + eps * isect.n.x, 213: isect.pl.y + eps * isect.n.y, 214: isect.pl.z + eps * isect.n.z) 39819602 215: nphi.times do |j| 397433157 216: ntheta.times do |i| 205857814 217: r = rand 983589779 218: phi = 2.0 * 3.14159265 * rand 62048471 219: x = Math.cos(phi) * Math.sqrt(1.0 - r) 60359481 220: y = Math.sin(phi) * Math.sqrt(1.0 - r) 58835518 221: z = Math.sqrt(r) 222: 262180475 223: rx = x * basis[0].x + y * basis[1].x + z * basis[2].x 173465064 224: ry = x * basis[0].y + y * basis[1].y + z * basis[2].y 198884270 225: rz = x * basis[0].z + y * basis[1].z + z * basis[2].z 226: 54537043340 227: raydir = Vec.new(rx, ry, rz) 3202836233 228: ray = Ray.new(p0, raydir) 229: 1931629501 230: occisect = Isect.new 574108733 231: @spheres[0].intersect(ray, occisect) 395288862 232: @spheres[1].intersect(ray, occisect) 359305624 233: @spheres[2].intersect(ray, occisect) 310291804 234: @plane.intersect(ray, occisect) 220775329 235: if occisect.hit then 69999794 236: occlusion = occlusion + 1.0 237: else 359931630 238: 0.0 239: end 240: end 241: end 242: 11206986 243: occlusion = (ntheta.to_f * nphi.to_f - occlusion) / (ntheta.to_f * nphi.to_f) 244: 2645612854 245: Vec.new(occlusion, occlusion, occlusion) 246: end 247: 20086 248: def render(w, h, nsubsamples) 15508 249: cnt = 0 14836 250: nsf = nsubsamples.to_f 43492145 251: h.times do |y| 154000401 252: w.times do |x| 2297040484 253: rad = Vec.new(0.0, 0.0, 0.0) 254: 255: # Subsmpling 40956785 256: nsubsamples.times do |v| 361985815 257: nsubsamples.times do |u| 258: 6759127 259: cnt = cnt + 1 6605290 260: wf = w.to_f 6998377 261: hf = h.to_f 6876010 262: xf = x.to_f 6817729 263: yf = y.to_f 6573161 264: uf = u.to_f 6575960 265: vf = v.to_f 266: 34139409 267: px = (xf + (uf / nsf) - (wf / 2.0)) / (wf / 2.0) 34857811 268: py = -(yf + (vf / nsf) - (hf / 2.0)) / (hf / 2.0) 269: 4640601492 270: eye = Vec.new(px, py, -1.0).vnormalize 271: 1900806167 272: ray = Ray.new(Vec.new(0.0, 0.0, 0.0), eye) 273: 456369628 274: isect = Isect.new 300801323 275: @spheres[0].intersect(ray, isect) 42257842 276: @spheres[1].intersect(ray, isect) 42565969 277: @spheres[2].intersect(ray, isect) 266949967 278: @plane.intersect(ray, isect) 51428494 279: if isect.hit then 202302210 280: col = ambient_occlusion(isect) 21356720 281: rad.x = rad.x + col.x 14780657 282: rad.y = rad.y + col.y 11872435 283: rad.z = rad.z + col.z 284: end 285: end 286: end 287: 5921625 288: r = rad.x / (nsf * nsf) 7886703 289: g = rad.y / (nsf * nsf) 5888393 290: b = rad.z / (nsf * nsf) 98414481 291: printf("%c", clamp(r)) 9520453 292: printf("%c", clamp(g)) 9690354 293: printf("%c", clamp(b)) 294: end 46513 295: nil 296: end 297: 298: nil 299: end 300: end 301: 302: # File.open("ao.ppm", "w") do |fp| 303: printf("P6\n") 304: printf("%d %d\n", IMAGE_WIDTH, IMAGE_HEIGHT) 305: printf("255\n", IMAGE_WIDTH, IMAGE_HEIGHT) 306: Scene.new.render(IMAGE_WIDTH, IMAGE_HEIGHT, NSUBSAMPLES) 307: # end
*1:RDTSCを使っています。なんか正確じゃないという話もあるみたいです
コメントを書く
トラックバック - http://d.hatena.ne.jp/miura1729/20090209/1234175896
リンク元
- 21 http://www.rubyist.net/~kazu/samidare/
- 2 http://a.hatena.ne.jp/cranebird/
- 2 http://a.hatena.ne.jp/fujita-y/
- 2 http://b.hatena.ne.jp/entrylist?sort=hot&threshold=3&url=http://d.hatena.ne.jp/miura1729/
- 2 http://llvmruby.org/wordpress-llvmruby/
- 1 http://a.hatena.ne.jp/asip/
- 1 http://a.hatena.ne.jp/cranebird/mobile
- 1 http://a.hatena.ne.jp/h_sakurai/
- 1 http://a.hatena.ne.jp/kei-os2007/mobile?
- 1 http://b.hatena.ne.jp/entry/7995532/ Mostly-copying garbage collectionをRubyで実現するためのメモ - miura1729の日記