Talk:Improving language shootout results
This gives about 15% improvement on Mac OS X 10.5, fpc 2.2.4 over the present version in the shootout:
The most important tricks are conversion of the multiplication for Cx and Cy in the loops into additions, actually manually in line merging the inlined function and save some assignements there. A real boost would be to use 128 bit xmm registers and instructions for Cx, Cy, Zr, Zi, Tr, Ti as well as threads for the quad core shootout, as some of the competitors do.
program mandelbrot2; var n: longint; TextBuf: array[0..$FFF] of byte; OutFile: PText; procedure run; const Limit: double = 4.0; two: double = 2.0; var i, index1, index2, bits, bit: longint; Zr, Zi, Ti, Tr: double; Cx, Cy, Step: double; begin Step := two/n; Cy := -1.0; for index1 := 1 to n do begin Cx := -1.5; bits := 255; bit := 128; for index2 := 1 to n do begin Ti := Cy * Cy; Tr := Cx * Cx; if (Tr + Ti >= limit) then bits := bits xor bit else begin Zi := (Cx + Cx + 1.0) * Cy; Zr := Tr - Ti + Cx; Ti := Zi * Zi; Tr := Zr * Zr; if (Tr + Ti >= limit) then bits := bits xor bit else begin for i := 3 to 50 do begin Zi := Zr*Zi + Zr*Zi + Cy; Zr := Tr - Ti + Cx; Ti := Zi * Zi; Tr := Zr * Zr; if (Tr + Ti >= limit) then begin bits := bits xor bit; break; end; end; end; end; if bit > 1 then bit := bit shr 1 else begin write(OutFile^, chr(bits)); bits := 255; bit := 128; end; Cx := Cx + Step; end; if bit < 128 then write(OutFile^, chr(bits xor ((bit shl 1) - 1))); Cy := Cy + Step; end; end; begin OutFile := @Output; SetTextBuf(OutFile^, TextBuf); Val(ParamStr(1), n); writeln(OutFile^, 'P4'); writeln(OutFile^, n,' ',n); run; end.