EVALUATION
We can definitely improve our array code in general, but it will never be the case for c1 that multidimensional array code is as fast as 1D code. I've got a couple small tweaks that improve this code quite a bit though.
1.4.0:
5.6 s - Test 1: 2D array using array[row][col]
4.6 s - Test 2: 1D array using array[row * colSize + col]
4.0 s - Test 4: 1D array using array[i]
3.6 s - Test 5: 2D array using arrayRow[col]
1.4.1:
4.9 s - Test 6: 2D array using array[row][col]
4.3 s - Test 7: 1D array using array[row * colSize + col]
3.6 s - Test 9: 1D array using array[i]
3.2 s - Test 10: 2D array using arrayRow[col]
fixed:
4.2 s - Test 1: 2D array using array[row][col]
3.0 s - Test 2: 1D array using array[row * colSize + col]
2.7 s - Test 4: 1D array using array[i]
2.9 s - Test 5: 2D array using arrayRow[col]
Given the lack of global optimizations in the client compiler, I don't think this can be improved much more in 1.4.x. Maybe in 1.5 something more can be done.
The fixes involved are breaking array length references in the IR so they can be commoned, which is something we'd talked about many times but never got to, and slightly improved expression shaping and constant folding and some trivial reassociation.
###@###.### 2002-09-03
I had to remove the expression reshaping code since it caused problems with spilling but I think we'll be able to revisit this in Tiger. The times for the current build are below.
smite ~ % /export/ws/compspeed/sparc/jdk1.4/bin/java MockFD
4.2 s - Test 1: 2D array using array[row][col]
3.5 s - Test 2: 1D array using array[row * colSize + col]
3.0 s - Test 3: 1D array using array[row * colSize + col] reassociated
2.7 s - Test 4: 1D array using array[i]
2.9 s - Test 5: 2D array using arrayRow[col]
Spread = 1.6 times
Which are fairly good but some of the reassociation of expressions that was being done in Test2 is gone, resulting in a slight slowdown. I've included the modififed test case I wrote below. It used a fixed random number seed which improves the reproducibility from run to run. I also introduced a case where I did some hand reassociation to compare it with what the compiler could do.
/* Mock up of a finite difference calculation to show array access speed for
different methods of implementing a1 2D matrix in Java */
//package benchmarks.arrays;
import java.util.*;
public abstract class MockFD {
private static final class ArrayOfArrays extends MockFD {
public String toString() { return "2D array using array[row][col]"; }
private final double[][] a1 = new double[rowSize][colSize];
private final double[][] a2 = new double[rowSize][colSize];
private final double[][] a3 = new double[rowSize][colSize];
private final double[][] a4 = new double[rowSize][colSize];
private static void
body(final double[][] d1, final double[][] s1,
final double[][] d2, final double[][] s2)
{
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
d1[r][c] = alpha * s1[r][c]
+ beta * (s1[r-1][c] + s1[r][c-1]
+ s1[r][c+1] + s1[r+1][c]);
d2[r][c] = alpha * s2[r][c]
+ beta * (s2[r-1][c] + s2[r][c-1]
+ s2[r][c+1] + s2[r+1][c]);
}
}
}
protected void calc() {
body(a2, a1, a4, a3);
body(a1, a2, a3, a4);
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
a1[r][c] += a3[r][c]; }
}
}
protected double sum() {
double sum = 0;
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
sum += a1[r][c]; }
}
return sum;
}
protected void fill() {
for (int r = border; r < rows + border; r++) {
System.arraycopy(row, 0, a1[r], border, cols); }
}
}
private static final class ArrayWithMult extends MockFD {
public String toString() {
return "1D array using array[row * colSize + col]"; }
private final double[] a1 = new double[rowSize * colSize];
private final double[] a2 = new double[rowSize * colSize];
private final double[] a3 = new double[rowSize * colSize];
private final double[] a4 = new double[rowSize * colSize];
private static void
body(final double[] d1, final double[] s1, final
double[] d2, final double[] s2)
{
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
d1[r * colSize + c] =
alpha * s1[r * colSize + c]
+ beta * (s1[(r-1) * colSize + c]
+ s1[r * colSize + (c-1)]
+ s1[r * colSize + (c+1)]
+ s1[(r+1) * colSize + c]);
d2[r * colSize + c] =
alpha * s2[r * colSize + c]
+ beta * (s2[(r-1) * colSize + c]
+ s2[r * colSize + (c-1)]
+ s2[r * colSize + (c+1)]
+ s2[(r+1) * colSize + c]);
}
}
}
protected void calc() {
body(a2, a1, a4, a3);
body(a1, a2, a3, a4);
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
a1[r * colSize + c] += a3[r * colSize + c]; }
}
}
protected double sum() {
double sum = 0;
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
sum += a1[r * colSize + c];
}
}
return sum;
}
protected void fill() {
for (int r = border; r < rows + border; r++) {
System.arraycopy(row, 0, a1, r * colSize + border, cols);
}
}
}
private static final class ArrayWithMultWithReassoc extends MockFD {
public String toString() {
return "1D array using array[row * colSize + col] reassociated"; }
private final double[] a1 = new double[rowSize * colSize];
private final double[] a2 = new double[rowSize * colSize];
private final double[] a3 = new double[rowSize * colSize];
private final double[] a4 = new double[rowSize * colSize];
private static void
body(final double[] d1, final double[] s1, final
double[] d2, final double[] s2)
{
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
d1[r * colSize + c] =
alpha * s1[r * colSize + c]
+ beta * (s1[r * colSize + c - colSize]
+ s1[r * colSize + c - 1]
+ s1[r * colSize + c + 1]
+ s1[r * colSize + c + colSize]);
d2[r * colSize + c] =
alpha * s2[r * colSize + c]
+ beta * (s2[r * colSize + c - colSize]
+ s2[r * colSize + c - 1]
+ s2[r * colSize + c + 1]
+ s2[r * colSize + c + colSize]);
}
}
}
protected void calc() {
body(a2, a1, a4, a3);
body(a1, a2, a3, a4);
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
a1[r * colSize + c] += a3[r * colSize + c]; }
}
}
protected double sum() {
double sum = 0;
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
sum += a1[r * colSize + c];
}
}
return sum;
}
protected void fill() {
for (int r = border; r < rows + border; r++) {
System.arraycopy(row, 0, a1, r * colSize + border, cols);
}
}
}
private static final class ArrayWithExtraIndex extends MockFD {
public String toString() { return "1D array using array[i]"; }
private final double[] a1 = new double[rowSize * colSize];
private final double[] a2 = new double[rowSize * colSize];
private final double[] a3 = new double[rowSize * colSize];
private final double[] a4 = new double[rowSize * colSize];
private static void
body(final double[] d1, final double[] s1,
final double[] d2, final double[] s2)
{
for (int r = border * colSize; r < (rows + border) * colSize;
r += colSize)
{
for (int c = 0, i = r + border; c < cols; c++, i++) {
d1[i] = alpha * s1[i]
+ beta * (s1[i-colSize] + s1[i-1]
+ s1[i+1] + s1[i+colSize]);
d2[i] = alpha * s2[i]
+ beta * (s2[i-colSize] + s2[i-1]
+ s2[i+1] + s2[i+colSize]);
}
}
}
protected void calc() {
body(a2, a1, a4, a3);
body(a1, a2, a3, a4);
for (int r = border * colSize; r < (rows + border) * colSize;
r += colSize)
{
for (int c = 0, i = r + border; c < cols; c++, i++) {
a1[i] += a3[i];
}
}
}
protected double sum() {
double sum = 0;
for (int r = border * colSize; r < (rows + border) * colSize;
r += colSize)
{
for (int c = 0, i = r + border; c < cols; c++, i++) {
sum += a1[i];
}
}
return sum;
}
protected void fill() {
for (int r = border; r < rows + border; r++) { System.arraycopy
(row, 0, a1, r * colSize + border, cols); }
}
}
private static final class ArrayOfArraysWithTempRows extends MockFD {
public String toString() { return "2D array using arrayRow[col]"; }
private final double[][] a1 = new double[rowSize][colSize];
private final double[][] a2 = new double[rowSize][colSize];
private final double[][] a3 = new double[rowSize][colSize];
private final double[][] a4 = new double[rowSize][colSize];
private static void
body(final double[][] d1, final double[][] s1,
final double[][] d2, final double[][] s2) {
for (int r = border; r < rows + border; r++) {
double[] d1R = d1[r];
double[] s1Rm1 = s1[r-1];
double[] s1R = s1[r];
double[] s1Rp1 = s1[r+1];
double[] d2R = d2[r];
double[] s2Rm1 = s2[r-1];
double[] s2R = s2[r];
double[] s2Rp1 = s2[r+1];
for (int c = border; c < cols + border; c++) {
d1R[c] = alpha * s1R[c]
+ beta * (s1Rm1[c] + s1R[c-1]
+ s1R[c+1] + s1Rp1[c]);
d2R[c] = alpha * s2R[c]
+ beta * (s2Rm1[c] + s2R[c-1]
+ s2R[c+1] + s2Rp1[c]);
}
}
}
protected void calc() {
body(a2, a1, a4, a3);
body(a1, a2, a3, a4);
for (int r = border; r < rows + border; r++) {
double[] a1R = a1[r];
double[] a3R = a3[r];
for (int c = border; c < cols + border; c++) {
a1R[c] += a3R[c];
}
}
}
protected double sum() {
double sum = 0;
for (int r = border; r < rows + border; r++) {
for (int c = border; c < cols + border; c++) {
sum += a1[r][c];
}
}
return sum;
}
protected void fill() {
for (int r = border; r < rows + border; r++) {
System.arraycopy(row, 0, a1[r], border, cols); }
}
}
private static final int loops = 100;
private static final int rows = 256;
private static final int cols = 256;
private static final int border = 1; // matrix borders are typically
// used in finite difference
// calculations for boundaries
private static final int rowSize = rows + 2 * border;
private static final int colSize = cols + 2 * border;
private static final double alpha = 2d / 3d;
private static final double beta = 1d / 3d;
private static final double[] row = new double[cols];
private static double result;
private static int testNumber;
private static double fastest = Double.MAX_VALUE;
private static double slowest;
private MockFD() {}
protected abstract void fill();
protected abstract void calc();
protected abstract double sum();
private static void initRow() {
Random r = new Random(0);
for (int c = 0; c < cols; c++) { row[c] = r.nextDouble(); }
}
private static double run(final MockFD t, final int loops) {
t.fill();
final long start = System.currentTimeMillis();
for (int loop = 0; loop < loops; loop++) { t.calc(); }
return Math.rint( (System.currentTimeMillis() - start) / 100d ) / 10;
}
private static void test(final MockFD t) {
run(t, loops / 2); // discard first run, compilation cause first run
// to be slow, which in turn means more loops to
// get accurate assessment of a big job, which in
// turn means longer run time. To speed things up
// ignore compilation.
System.gc();
final double time = run(t, loops);
if (testNumber == 0) { result = t.sum(); }
if ( result == t.sum() ) {
System.out.println( time + " s - Test " + (++testNumber) + ": " + t );
slowest = Math.max(slowest, time);
fastest = Math.min(fastest, time);
} else {
System.out.println(t + " gave incorrect result of "
+ t.sum()+ ", should be " + result);
}
}
public static void main(final String[] args) {
doit();
//doit();
}
public static void doit() {
initRow();
test( new ArrayOfArrays() );
test( new ArrayWithMult() );
test( new ArrayWithMultWithReassoc() );
test( new ArrayWithExtraIndex() );
test( new ArrayOfArraysWithTempRows() );
final double spread = Math.rint(10 * slowest / fastest) / 10;
System.out.println("Spread = " + spread + " times");
}
}
###@###.### 2002-10-14
|