2009-02-09
AOベンチ高速化計画(その1)
AOベンチを高速化してみましょうという企画を始めます。まずは、プロファイルを取ってみました。いつの間にかエンバグしてプロファイラが動かなくなってました。とりあえず直したので、試したい人(果たしているのだろうか?)は最新版にしておいてください。
結果を示します。左の数字がその行で費やしたクロック数*1です。
ちょっとみると、オブジェクトを作るところと、アクセサで時間を食っているみたいです。
これをどう料理するのか、方針はあるのですが具体的にどうするのか全然考えていないのでした。
続く(のか?)
1: # AO render benchmark
2: # Original program (C) Syoyo Fujita in Javascript (and other languages)
3: # http://lucille.atso-net.jp/blog/?p=642
4: # http://lucille.atso-net.jp/blog/?p=711
5: # Ruby(yarv2llvm) version by Hideki Miura
6: #
7:
8: IMAGE_WIDTH = 128
9: IMAGE_HEIGHT = 128
10: NSUBSAMPLES = 2
11: NAO_SAMPLES = 4
12:
13: =begin
14: def rand
15: 0.5
16: end
17: =end
18:
19: class Vec
3482907111 20: def initialize(x, y, z)
7745809730 21: @x = x
8049657700 22: @y = y
89874967912 23: @z = z
24: end
25:
214036759 26: def x=(v); @x = v; end
164287218 27: def y=(v); @y = v; end
179976792 28: def z=(v); @z = v; end
92319516647 29: def x; @x; end
9066123677 30: def y; @y; end
6470660511 31: def z; @z; end
32:
33: def vadd(b)
34: Vec.new(@x + b.x, @y + b.y, @z + b.z)
35: end
36:
202794523 37: def vsub(b)
799888247 38: Vec.new(@x - b.x, @y - b.y, @z - b.z)
39: end
40:
7363025 41: def vcross(b)
28365353 42: Vec.new(@y * b.z - @z * b.y,
43: @z * b.x - @x * b.z,
44: @x * b.y - @y * b.x)
45: end
46:
611327399 47: def vdot(b)
2277799286 48: @x * b.x + @y * b.y + @z * b.z
49: end
50:
19571071 51: def vlength
71391784 52: Math.sqrt(@x * @x + @y * @y + @z * @z)
53: end
54:
20394222 55: def vnormalize
154529678 56: len = vlength
2454418673 57: v = Vec.new(@x, @y, @z)
19759771 58: if len > 1.0e-17 then
120984490 59: v.x = v.x / len
105079075 60: v.y = v.y / len
93239809 61: v.z = v.z / len
62: end
132115526 63: v
64: end
65: end
66:
67:
68: class Sphere
294 69: def initialize(center, radius)
12757 70: @center = center
21694 71: @radius = radius
72: end
73:
74: def center; @center; end
75: def radius; @radius; end
76:
197566836 77: def intersect(ray, isect)
549177301 78: rs = ray.org.vsub(@center)
1025943933 79: b = rs.vdot(ray.dir)
1051817735 80: c = rs.vdot(rs) - (@radius * @radius)
203202707 81: d = b * b - c
451149528 82: if d > 0.0 then
14470075 83: t = - b - Math.sqrt(d)
84:
63325012 85: if t > 0.0 and t < isect.t then
51692972 86: isect.t = t
49138790 87: isect.hit = true
14762316 88: isect.pl = Vec.new(ray.org.x + ray.dir.x * t,
89: ray.org.y + ray.dir.y * t,
90: ray.org.z + ray.dir.z * t)
40683471 91: n = isect.pl.vsub(@center)
38178484 92: isect.n = n.vnormalize
93: else
29409513 94: 0.0
95: end
96: end
97: nil
98: end
99: end
100:
101: class Plane
105 102: def initialize(p, n)
11067 103: @p = p
848920391 104: @n = n
105: end
106:
66415232 107: def intersect(ray, isect)
284177670 108: d = -@p.vdot(@n)
218832810 109: v = ray.dir.vdot(@n)
66846341 110: v0 = v
72313262 111: if v < 0.0 then
7918877 112: v0 = -v
113: end
65515935 114: if v0 < 1.0e-17 then
32313 115: return
116: end
117:
237211844 118: t = -(ray.org.vdot(@n) + d) / v
119:
213796475 120: if t > 0.0 and t < isect.t then
21664931 121: isect.hit = true
24819490 122: isect.t = t
28158267 123: isect.n = @n
21091378 124: isect.pl = Vec.new(ray.org.x + t * ray.dir.x,
125: ray.org.y + t * ray.dir.y,
126: ray.org.z + t * ray.dir.z)
127: end
128: nil
129: end
130: end
131:
132: class Ray
68183865 133: def initialize(org, dir)
464286359 134: @org = org
5796958193 135: @dir = dir
136: end
137:
2653505502 138: def org; @org; end
139: def org=(v); @org = v; end
2599960532 140: def dir; @dir; end
141: def dir=(v); @dir = v; end
142: end
143:
144: class Isect
65991211 145: def initialize
399979619 146: @t = 10000000.0
141381218 147: @hit = false
1506247178 148: @pl = Vec.new(0.0, 0.0, 0.0)
1992140835 149: @n = Vec.new(0.0, 0.0, 0.0)
150: end
151:
62780868 152: def t; @t; end
69180946 153: def t=(v); @t = v; end
310561402 154: def hit; @hit; end
49700965 155: def hit=(v); @hit = v; end
111828741 156: def pl; @pl; end
70391819 157: def pl=(v); @pl = v; end
341768739 158: def n; @n; end
53883525 159: def n=(v); @n = v; end
160: end
161:
4964604 162: def clamp(f)
5013257 163: i = f * 255.5
5809394 164: if i > 255.0 then
345576 165: i = 255.0
166: end
5108489 167: if i < 0.0 then
168: i = 0.0
169: end
635615432 170: i.to_i
171: end
172:
3553251 173: def otherBasis(basis, n)
13719777 174: basis[2] = Vec.new(n.x, n.y, n.z)
276236476 175: basis[1] = Vec.new(0.0, 0.0, 0.0)
176:
20436274 177: if n.x < 0.6 and n.x > -0.6 then
27269672 178: basis[1].x = 1.0
179: elsif n.y < 0.6 and n.y > -0.6 then
756300 180: basis[1].y = 1.0
181: elsif n.z < 0.6 and n.z > -0.6 then
67901 182: basis[1].z = 1.0
183: else
184: basis[1].x = 1.0
185: end
186:
126451711 187: basis[0] = basis[1].vcross(basis[2])
40735089 188: basis[0] = basis[0].vnormalize
189:
29057644 190: basis[1] = basis[2].vcross(basis[0])
39266610 191: basis[1] = basis[1].vnormalize
192: end
193:
194: class Scene
95 195: def initialize
34619 196: @spheres = Array.new
162606024 197: @spheres[0] = Sphere.new(Vec.new(-2.0, 0.0, -3.5), 0.5)
12463 198: @spheres[1] = Sphere.new(Vec.new(-0.5, 0.0, -3.0), 0.5)
2163 199: @spheres[2] = Sphere.new(Vec.new(1.0, 0.0, -2.2), 0.5)
2027 200: @plane = Plane.new(Vec.new(0.0, -0.5, 0.0), Vec.new(0.0, 1.0, 0.0))
201: end
202:
3991300 203: def ambient_occlusion(isect)
86129974 204: basis = Array.new
42215920 205: otherBasis(basis, isect.n)
206:
3628173 207: ntheta = NAO_SAMPLES
3554923 208: nphi = NAO_SAMPLES
3907605 209: eps = 0.0001
3585756 210: occlusion = 0.0
211:
14516153 212: p0 = Vec.new(isect.pl.x + eps * isect.n.x,
213: isect.pl.y + eps * isect.n.y,
214: isect.pl.z + eps * isect.n.z)
39819602 215: nphi.times do |j|
397433157 216: ntheta.times do |i|
205857814 217: r = rand
983589779 218: phi = 2.0 * 3.14159265 * rand
62048471 219: x = Math.cos(phi) * Math.sqrt(1.0 - r)
60359481 220: y = Math.sin(phi) * Math.sqrt(1.0 - r)
58835518 221: z = Math.sqrt(r)
222:
262180475 223: rx = x * basis[0].x + y * basis[1].x + z * basis[2].x
173465064 224: ry = x * basis[0].y + y * basis[1].y + z * basis[2].y
198884270 225: rz = x * basis[0].z + y * basis[1].z + z * basis[2].z
226:
54537043340 227: raydir = Vec.new(rx, ry, rz)
3202836233 228: ray = Ray.new(p0, raydir)
229:
1931629501 230: occisect = Isect.new
574108733 231: @spheres[0].intersect(ray, occisect)
395288862 232: @spheres[1].intersect(ray, occisect)
359305624 233: @spheres[2].intersect(ray, occisect)
310291804 234: @plane.intersect(ray, occisect)
220775329 235: if occisect.hit then
69999794 236: occlusion = occlusion + 1.0
237: else
359931630 238: 0.0
239: end
240: end
241: end
242:
11206986 243: occlusion = (ntheta.to_f * nphi.to_f - occlusion) / (ntheta.to_f * nphi.to_f)
244:
2645612854 245: Vec.new(occlusion, occlusion, occlusion)
246: end
247:
20086 248: def render(w, h, nsubsamples)
15508 249: cnt = 0
14836 250: nsf = nsubsamples.to_f
43492145 251: h.times do |y|
154000401 252: w.times do |x|
2297040484 253: rad = Vec.new(0.0, 0.0, 0.0)
254:
255: # Subsmpling
40956785 256: nsubsamples.times do |v|
361985815 257: nsubsamples.times do |u|
258:
6759127 259: cnt = cnt + 1
6605290 260: wf = w.to_f
6998377 261: hf = h.to_f
6876010 262: xf = x.to_f
6817729 263: yf = y.to_f
6573161 264: uf = u.to_f
6575960 265: vf = v.to_f
266:
34139409 267: px = (xf + (uf / nsf) - (wf / 2.0)) / (wf / 2.0)
34857811 268: py = -(yf + (vf / nsf) - (hf / 2.0)) / (hf / 2.0)
269:
4640601492 270: eye = Vec.new(px, py, -1.0).vnormalize
271:
1900806167 272: ray = Ray.new(Vec.new(0.0, 0.0, 0.0), eye)
273:
456369628 274: isect = Isect.new
300801323 275: @spheres[0].intersect(ray, isect)
42257842 276: @spheres[1].intersect(ray, isect)
42565969 277: @spheres[2].intersect(ray, isect)
266949967 278: @plane.intersect(ray, isect)
51428494 279: if isect.hit then
202302210 280: col = ambient_occlusion(isect)
21356720 281: rad.x = rad.x + col.x
14780657 282: rad.y = rad.y + col.y
11872435 283: rad.z = rad.z + col.z
284: end
285: end
286: end
287:
5921625 288: r = rad.x / (nsf * nsf)
7886703 289: g = rad.y / (nsf * nsf)
5888393 290: b = rad.z / (nsf * nsf)
98414481 291: printf("%c", clamp(r))
9520453 292: printf("%c", clamp(g))
9690354 293: printf("%c", clamp(b))
294: end
46513 295: nil
296: end
297:
298: nil
299: end
300: end
301:
302: # File.open("ao.ppm", "w") do |fp|
303: printf("P6\n")
304: printf("%d %d\n", IMAGE_WIDTH, IMAGE_HEIGHT)
305: printf("255\n", IMAGE_WIDTH, IMAGE_HEIGHT)
306: Scene.new.render(IMAGE_WIDTH, IMAGE_HEIGHT, NSUBSAMPLES)
307: # end
*1:RDTSCを使っています。なんか正確じゃないという話もあるみたいです
コメントを書く
トラックバック - http://d.hatena.ne.jp/miura1729/20090209/1234175896
リンク元
- 21 http://www.rubyist.net/~kazu/samidare/
- 2 http://a.hatena.ne.jp/cranebird/
- 2 http://a.hatena.ne.jp/fujita-y/
- 2 http://b.hatena.ne.jp/entrylist?sort=hot&threshold=3&url=http://d.hatena.ne.jp/miura1729/
- 2 http://llvmruby.org/wordpress-llvmruby/
- 1 http://a.hatena.ne.jp/asip/
- 1 http://a.hatena.ne.jp/cranebird/mobile
- 1 http://a.hatena.ne.jp/h_sakurai/
- 1 http://a.hatena.ne.jp/kei-os2007/mobile?
- 1 http://b.hatena.ne.jp/entry/7995532/ Mostly-copying garbage collectionをRubyで実現するためのメモ - miura1729の日記