// rast_edge_t and rast_tri_t are the precompute structure used for triangles // the prepped triangle has: // - integer values for y0, y1, y2 // - a bit that says if the spanning edge of the tri is on the left or right // - the three edges. Each edge has x(y0) and dx/dy. // (vertex 2 actually has x(y2) // the prepped triangle is also presorted. vertex 0 is the lowest vertex // (smallest y), vertex 1 is the highest, vertex 2 in the middle record rast_edge_t { float x, delta; }; struct rast_tri_t { int y01; // y0 in low halfword, y1 in high halfword int se_on_left_id_y2; // y2 in low halfword [15:0], // id in [30:16], se_on_left in high bit [31] rast_edge_t se, e0, e1; }; // cistream is "conditional input stream" // rast_tri_str is the input triangle stream // screen_width_uc is the width of the screen // ostream is "output stream" // pixel_rast_str is the output stream of pixels // itocc is integer-to-condition-code // select is ? : (select(a,b,c) == a ? b : c) // selectr is select records: same as select except b and c are records // (records are just structs) kernel xyrast(cistream rast_tri_str, uc & screen_width_uc, ostream pixel_rast_str) { rast_tri_t tri, new_tri; pixel_rast_t xyf; float x, y; float y0, y1, y2; int y01; int first, valid, increment; cc ccdone, ccend; cc new_tri_p, new_span_p; cc se_on_left; float xstart, xend; ccdone = itocc(0); new_tri_p = itocc(0xffffffff); new_span_p = itocc(0xffffffff); int alldone = 0; synch(); // for uc params float screen_width = itof(uc_to_cluster(screen_width_uc)); loop_until_all(ccdone) pipeline(1) { // bring in a new triangle rast_tri_str(new_tri_p, ccend) >> new_tri; // are we done with the current triangle? // yes if we asked for a new one AND didn't get it ccdone = itocc(cctoi(new_tri_p) & cctoi(ccend)); alldone = alldone | cctoi(ccdone); tri = selectr(new_tri_p, new_tri, tri); first = select(new_tri_p, 1, 0); y0 = select(new_tri_p, itof(tri.y01 & 0xffff), y0); y = select(new_tri_p, y0, y); y1 = select(new_tri_p, itof(shift(tri.y01, -16) & 0xffff), y1); y2 = select(new_tri_p, itof(tri.se_on_left_id_y2 & 0xffff), y2); se_on_left = itocc(shift(tri.se_on_left_id_y2, -31) != 0); // how can we run bidirectionally? // initial values. Goal is to start with se all the time. // the issue is: e----se or se----e // can we make it do a computation that doesn't care? // deltas: not a problem // offsetmin, offsetmax: a bit more of a problem - directional // one bit says direction (actually "increment") because we have // to write a span which starts with se and ends with e // ok, so what's the info we need to send and how does it change with dir? // // info se----e e----se difference? // increment +1 -1 yes // draw fse on int? no yes // draw fe on int? yes no // offset_start floor(fse) + 1 floor(fse) yes // offset_end floor(fe) floor(fe) + 1 float e0span = abs(tri.e0.x - tri.se.x); float e1span = abs(tri.e1.x - tri.se.x); cc use_e0 = itocc((e0span < e1span) | (y < y2)); float e01x = select(use_e0, tri.e0.x, tri.e1.x); float floor_fse = itof(ftoi(tri.se.x)); float floor_fse_p1 = floor_fse + 1.0f; floor_fse = select(itocc(floor_fse > screen_width), screen_width, floor_fse); floor_fse_p1 = select(itocc(floor_fse_p1 < 0.0f), screen_width, floor_fse_p1); float floor_fe = itof(ftoi(e01x)); float floor_fe_p1 = floor_fe + 1.0f; floor_fe = select(itocc(floor_fse > screen_width), screen_width, floor_fe); floor_fe_p1 = select(itocc(floor_fe_p1 < 0.0f), screen_width, floor_fe_p1); xstart = select(new_span_p, select(se_on_left, floor_fse_p1, floor_fe_p1), xstart); x = select(new_span_p, xstart, x); xend = select(new_span_p, select(se_on_left, floor_fe, floor_fse), xend); /* 3 threads of execution: * 1) new triangle * 2) same triangle, new span * 3) same triangle, same span */ /* assuming we have x and y now */ xyf.x = x; xyf.y = y; int y_too_far = (y > y1); int x_too_far = (x > xend); // x_too_far only for 0-pixel tris valid = select(itocc(alldone | y_too_far | x_too_far), 0, 2); xyf.id_valid_first = ((tri.se_on_left_id_y2 & 0x7fff0000) | first) | valid; pixel_rast_str << xyf; /* increment */ x = x + 1.0f; x_too_far = (x > xend); new_span_p = itocc(x_too_far); cc new_span_and_y2 = itocc(cctoi(new_span_p) & (y >= y2)); tri.se.x = select(new_span_p, tri.se.x + tri.se.delta, tri.se.x); tri.e0.x = select(new_span_p, tri.e0.x + tri.e0.delta, tri.e0.x); tri.e1.x = select(new_span_and_y2, tri.e1.x + tri.e1.delta, tri.e1.x); y = select(new_span_p, y + 1.0f, y); y_too_far = (y > y1); new_tri_p = itocc(y_too_far | alldone); new_span_p = itocc(cctoi(new_span_p) | cctoi(new_tri_p)); } }