tbp wrote:
> I'll leave non objective criterions aside (how do you measure clarity?)
> and, as you insist on discarding my previous patches & measures, i've
> taken the time this morning to make this up:
I appreciate it, thank you.
On my 1.2GHz Athlon t-bird I get:
$ g++-3.4 -Wall -O3 -ffast-math -funroll-all-loops ray.cpp -o ray
$ time ./ray >image.ppm
real 0m55.256s
user 0m54.860s
sys 0m0.080s
$ g++-3.4 -Wall -O3 -ffast-math -funroll-all-loops miniray.cpp -o miniray
miniray.cpp:18: warning: division by zero in `1.0e+0 / 0.'
$ time ./miniray >image.ppm
real 0m28.587s
user 0m28.400s
sys 0m0.040s
So your optimised version is 1.9x faster.
My optimised version is basically the same speed as yours (although quite
different):
$ g++-3.4 -Wall -O3 -march=athlon-tbird -ffast-math -funroll-all-loops
ray4.cpp -o ray4
$ time ./ray4 >image.ppm
real 0m29.960s
user 0m27.150s
sys 0m0.040s
> Before you start nitpicking please consider that:
> a) this version complies with the shootout rules AFAIK.
Yes.
> b) it outputs the exact same picture.
Yes.
> c) i had to raise the epsilon a bit to avoid acnea, it has to do with
> the way i compute normals but i'm not going to fix that.
Your epsilon is actually smaller than my delta (which was sqrt epsilon).
> d) i removed the virtualness in the most straightforward way and it's
> certainly not aesthetically pleasing
I'll study this in more detail.
> e) i'm not going to spend ages on this, hence c) & d).
What better solution would you recommend for (d)?
My optimised version uses:
struct Tree {
vector<Tree> child;
Vec center;
double radius;
Tree(Vec c, double r) : child(), center(c), radius(r) {}
};
which I don't much like.
> f) it's algorithmically equivalent to your version (ie primary and
> shadow rays aren't special cased etc...)
My optimised version has a specialised intersect function for shadow rays
(but still fits in <100 LOC and is just a clear, IMHO).
>> In both cases, the types of objects and bounding volumes are
> extensible.
> That means nothing. Everything is extensible.
> And in this *particular case*, using dynamic binding is, again,
> superfluous.
> I've resolved that in a really crude way, but that's just one
> possibility.
Can you describe how a new kind of object/bound would be implemented?
>> Yes, exactly. It isn't really that surprising when you consider that
>> "ray_sphere" is called ~10x more than "intersect" and it contains
> several
>> branches, lots of floating point maths and function calls.
> I was sarcastic; "ray_sphere" calls that interection routine which
> cannot be inlined because it's virtual etc...
In my original implementation, "intersect" calls "ray_sphere" and not the
other way around. Do you mean "ray_sphere" could not have been inlined?
> Also, the fact is that this kind of sphere intersection can easily be
> made branchless with something like:
> if ((d < 0.) | (t2 < 0.)) return infinity;
> else return t1 > 0. ? t1 : t2;
> But you want to branch away from the square root as it is what's
> wasting most cycles.
I hadn't optimised "ray_sphere".
>> The C++ outperforms the OCaml on AMD64 (and IA64). I'd say that the
>> performance is comparable in all cases.
> Again, your benchmark as presented it totally unfair.
In what way is it unfair?
Here's the C++ I had:
#include <vector>
#include <iostream>
#include <limits>
#include <cmath>
using namespace std;
numeric_limits<double> dbl;
double delta = sqrt(dbl.epsilon()), infinity = dbl.infinity(), pi = M_PI;
struct Vec { // 3D vector
double x, y, z;
Vec(double x2, double y2, double z2) : x(x2), y(y2), z(z2) {}
};
inline Vec operator+(const Vec &a, const Vec &b)
{ return Vec(a.x + b.x, a.y + b.y, a.z + b.z); }
inline Vec operator-(const Vec &a, const Vec &b)
{ return Vec(a.x - b.x, a.y - b.y, a.z - b.z); }
inline Vec operator*(double a, const Vec &b)
{ return Vec(a * b.x, a * b.y, a * b.z); }
inline double dot(const Vec &a, const Vec &b) { return a.x*b.x + a.y*b.y +
a.z*b.z; }
inline Vec unitise(const Vec &a) { return (1 / sqrt(dot(a, a))) * a; }
struct Ray { Vec orig, dir; Ray(Vec o, Vec d) : orig(o), dir(d) {} };
struct Tree {
vector<Tree> child;
Vec center;
double radius;
Tree(Vec c, double r) : child(), center(c), radius(r) {}
};
inline double ray_sphere(const Ray &ray, const Tree &tree) {
Vec v = tree.center - ray.orig;
double b = dot(v, ray.dir), disc = b*b - dot(v, v) +
tree.radius*tree.radius;
if (disc < 0) return infinity;
double d = sqrt(disc), t2 = b + d;
if (t2 < 0) return infinity;
double t1 = b - d;
return (t1 > 0 ? t1 : t2);
}
void intersect(double &lambda, Vec &normal, const Ray &ray, const Tree
&tree) {
double l = ray_sphere(ray, tree);
if (l >= lambda) return;
if (tree.child.size() == 0) {
lambda = l;
normal = unitise(ray.orig + l * ray.dir - tree.center);
} else
for (vector<Tree>::const_iterator it=tree.child.begin();
it!=tree.child.end(); ++it)
intersect(lambda, normal, ray, *it);
}
bool intersect(const Ray &ray, const Tree &tree) {
if (ray_sphere(ray, tree) == infinity) return false;
if (tree.child.size() == 0) return true; else {
for (vector<Tree>::const_iterator it=tree.child.begin();
it != tree.child.end(); ++it)
if (intersect(ray, *it)) return true;
}
return false;
}
double ray_trace(const double weight, const Vec &light, const Ray &ray,
const Tree &tree) {
double lambda = infinity;
Vec normal(0, 0, 0);
intersect(lambda, normal, ray, tree);
if (lambda == infinity) return 0;
Vec o = ray.orig + lambda * ray.dir + delta * normal;
double g = -dot(normal, light);
if (g <= 0) return 0.;
return (intersect(Ray(o, Vec(0, 0, 0) - light), tree) ? 0 : g);
}
Tree create(int level, double r, double x, double y, double z) {
if (level == 1) return Tree(Vec(x, y, z), r);
Tree group = Tree(Vec(x, y, z), 3*r);
group.child.push_back(Tree(Vec(x, y, z), r));
double rn = 3*r/sqrt(12.);
for (int dz=-1; dz<=1; dz+=2)
for (int dx=-1; dx<=1; dx+=2)
group.child.push_back(create(level-1, r/2, x-dx*rn, y+rn, z-dz*rn));
return group;
}
int main(int argc, char *argv[]) {
int level = (argc==2 ? atoi(argv[1]) : 6), w = 512, h = 512, ss = 4;
Tree tree = create(level, 1, 0, -1, 0);
cout << "P2\n" << w << " " << h << "\n256\n";
for (int y=h-1; y>=0; --y) {
for (int x=0; x<w; ++x) {
double g=0;
for (int dx=0; dx<ss; ++dx)
for (int dy=0; dy<ss; ++dy) {
Vec d(x+double(dx)/ss-w/2, y+double(dy)/ss-h/2, max(w, h));
g += ray_trace(1, unitise(Vec(-1, -3, 2)), Ray(Vec(0, 0, -4),
unitise(d)), tree);
}
cout << min(255, int(256. * g / (ss*ss))) << " ";
}
cout << endl;
}
return 0;
}
--
Dr Jon D Harrop, Flying Frog Consultancy
http://www.ffconsultancy.com