Java Solaris Communities Sun Store Join SDN My Profile Why Join?
 
Bug Database
Bug Detail
Quick Lists
Top 25 Bugs
Top 25 RFE's
Recently Closed Bugs
Printable Page Printable Page


Bug Database
Bug ID: 6591971
Votes 0
Synopsis (bf) Optimize direct buffer byte-swapping
Category java:classes_nio
Reported Against
Release Fixed 7(b20)
State 10-Fix Delivered, bug
Priority: 3-Medium
Related Bugs
Submit Date 13-AUG-2007
Description
While looking at redundant casts, I noticed Bits.swap can be made faster.
int shifting is likely to be fastest, so long byte swapping should use int
swapping in its implementation, but int swapping should not use short swapping.

A few more other cycles can be squeezed.

--- /u/martin/ws/dolphin/src/share/classes/java/nio/Bits.java	2007-06-19 15:35:36.228695000 -0700
+++ /u/martin/ws/redundant/src/share/classes/java/nio/Bits.java	2007-08-12 13:50: customer .682152000 -0700
@@ -43,22 +43,24 @@
 
     static short swap(short x) {
 	return (short)((x << 8) |
-		       ((x >> 8) & 0xff));
+		       ((char)x >>> 8));
     }
 
     static char swap(char x) {
 	return (char)((x << 8) |
-		      ((x >> 8) & 0xff));
+		      (x >>> 8));
     }
 
     static int swap(int x) {
-	return (int)((swap((short)x) << 16) |
-		     (swap((short)(x >> 16)) & 0xffff));
+	return ((x << 24) |
+		((x & 0x0000ff00) <<  8) |
+		((x & 0x00ff0000) >>> 8) |
+		(x >>> 24));
     }
 
     static long swap(long x) {
-	return (long)(((long)swap((int)(x)) << 32) |
-		      ((long)swap((int)(x >> 32)) & 0xffffffffL));
+	return (((long)swap((int)x) << 32) |
+		((long)swap((int)(x >>> 32)) & 0xffffffffL));
     }
Posted Date : 2007-08-13 00:25:10.0
Work Around
N/A
Evaluation
Indeed.

Here's a microbenchmark:


import java.util.*;
import java.nio.*;
import java.util.concurrent.*;
import java.util.regex.Pattern;

public class SwapMicroBenchmark {
    abstract static class Job {
	private final String name;
	public Job(String name) { this.name = name; }
	public String name() { return name; }
	public abstract void work() throws Throwable;
    }

    private static void collectAllGarbage() {
	final java.util.concurrent.CountDownLatch drained
	    = new java.util.concurrent.CountDownLatch(1);
	try {
	    System.gc();	// enqueue finalizable objects
	    new Object() { protected void finalize() {
		drained.countDown(); }};
	    System.gc();	// enqueue detector
	    drained.await();	// wait for finalizer queue to drain
	    System.gc();	// cleanup finalized objects
	} catch (InterruptedException e) { throw new Error(e); }
    }

    /**
     * Runs each job for long enough that all the runtime compilers
     * have had plenty of time to warm up, i.e. get around to
     * compiling everything worth compiling.
     * Returns array of average times per job per run.
     */
    private static long[] time0(Job ... jobs) throws Throwable {
	final long warmupNanos = 10L * 1000L * 1000L * 1000L;
	long[] nanoss = new long[jobs.length];
	for (int i = 0; i < jobs.length; i++) {
	    collectAllGarbage();
	    long t0 = System.nanoTime();
	    long t;
	    int j = 0;
	    do { jobs[i].work(); j++; }
	    while ((t = System.nanoTime() - t0) < warmupNanos);
	    nanoss[i] = t/j;
	}
	return nanoss;
    }

    private static void time(Job ... jobs) throws Throwable {

	long[] warmup = time0(jobs); // Warm up run
	long[] nanoss = time0(jobs); // Real timing run
	long[] milliss = new long[jobs.length];
	double[] ratios = new double[jobs.length];

	final String nameHeader   = "Method";
	final String millisHeader = "Millis";
	final String ratioHeader  = "Ratio";

	int nameWidth   = nameHeader.length();
	int millisWidth = millisHeader.length();
	int ratioWidth  = ratioHeader.length();

	for (int i = 0; i < jobs.length; i++) {
	    nameWidth = Math.max(nameWidth, jobs[i].name().length());

	    milliss[i] = nanoss[i]/(1000L * 1000L);
	    millisWidth = Math.max(millisWidth,
				   String.format("%d", milliss[i]).length());

	    ratios[i] = (double) nanoss[i] / (double) nanoss[0];
	    ratioWidth = Math.max(ratioWidth,
				  String.format("%.3f", ratios[i]).length());
	}

	String format = String.format("%%-%ds %%%dd %%%d.3f%%n",
				      nameWidth, millisWidth, ratioWidth);
	String headerFormat = String.format("%%-%ds %%%ds %%%ds%%n",
					    nameWidth, millisWidth, ratioWidth);
	System.out.printf(headerFormat, "Method", "Millis", "Ratio");

	// Print out absolute and relative times, calibrated against first job
	for (int i = 0; i < jobs.length; i++)
	    System.out.printf(format, jobs[i].name(), milliss[i], ratios[i]);
    }

    private static String keywordValue(String[] args, String keyword) {
	for (String arg : args)
	    if (arg.startsWith(keyword))
		return arg.substring(keyword.length() + 1);
	return null;
    }

    private static int intArg(String[] args, String keyword, int defaultValue) {
	String val = keywordValue(args, keyword);
	return val == null ? defaultValue : Integer.parseInt(val);
    }

    private static Pattern patternArg(String[] args, String keyword) {
	String val = keywordValue(args, keyword);
	return val == null ? null : Pattern.compile(val);
    }

    private static Job[] filter(Pattern filter, Job[] jobs) {
	if (filter == null) return jobs;
	Job[] newJobs = new Job[jobs.length];
	int n = 0;
	for (Job job : jobs)
	    if (filter.matcher(job.name()).find())
		newJobs[n++] = job;
	// Arrays.copyOf not available in JDK 5
	Job[] ret = new Job[n];
	System.arraycopy(newJobs, 0, ret, 0, n);
	return ret;
    }

    private static void deoptimize(int sum) {
	if (sum == 42)
	    System.out.println("the answer");
    }

    /**
     * Usage: [iterations=N] [size=N] [filter=REGEXP]
     */
    public static void main(String[] args) throws Throwable {
	final int iterations = intArg(args, "iterations", 10000);
	final int size       = intArg(args, "size", 1024);
	final Pattern filter = patternArg(args, "filter");

	final Random rnd = new Random();

	final ByteBuffer b = ByteBuffer.allocateDirect(8*size);
	for (int i = 0; i < b.limit(); i++)
	    b.put(i, (byte) rnd.nextInt());

	Job[] jobs = {
	    new Job("swap char BIG_ENDIAN") {
		public void work() throws Throwable {
		    b.order(ByteOrder.BIG_ENDIAN);
		    CharBuffer x = b.asCharBuffer();
		    for (int i = 0; i < iterations; i++) {
			int sum = 0;
			for (int j = 0, end = x.limit(); j < end; j++)
			    sum += x.get(j);
			deoptimize(sum);}}},
	    new Job("swap char LITTLE_ENDIAN") {
		public void work() throws Throwable {
		    b.order(ByteOrder.LITTLE_ENDIAN);
		    CharBuffer x = b.asCharBuffer();
		    for (int i = 0; i < iterations; i++) {
			int sum = 0;
			for (int j = 0, end = x.limit(); j < end; j++)
			    sum += x.get(j);
			deoptimize(sum);}}},
	    new Job("swap short BIG_ENDIAN") {
		public void work() throws Throwable {
		    b.order(ByteOrder.BIG_ENDIAN);
		    ShortBuffer x = b.asShortBuffer();
		    for (int i = 0; i < iterations; i++) {
			int sum = 0;
			for (int j = 0, end = x.limit(); j < end; j++)
			    sum += x.get(j);
			deoptimize(sum);}}},
	    new Job("swap short LITTLE_ENDIAN") {
		public void work() throws Throwable {
		    b.order(ByteOrder.LITTLE_ENDIAN);
		    ShortBuffer x = b.asShortBuffer();
		    for (int i = 0; i < iterations; i++) {
			int sum = 0;
			for (int j = 0, end = x.limit(); j < end; j++)
			    sum += x.get(j);
			deoptimize(sum);}}},
	    new Job("swap int BIG_ENDIAN") {
		public void work() throws Throwable {
		    b.order(ByteOrder.BIG_ENDIAN);
		    IntBuffer x = b.asIntBuffer();
		    for (int i = 0; i < iterations; i++) {
			int sum = 0;
			for (int j = 0, end = x.limit(); j < end; j++)
			    sum += x.get(j);
			deoptimize(sum);}}},
	    new Job("swap int LITTLE_ENDIAN") {
		public void work() throws Throwable {
		    b.order(ByteOrder.LITTLE_ENDIAN);
		    IntBuffer x = b.asIntBuffer();
		    for (int i = 0; i < iterations; i++) {
			int sum = 0;
			for (int j = 0, end = x.limit(); j < end; j++)
			    sum += x.get(j);
			deoptimize(sum);}}},
	    new Job("swap long BIG_ENDIAN") {
		public void work() throws Throwable {
		    b.order(ByteOrder.BIG_ENDIAN);
		    LongBuffer x = b.asLongBuffer();
		    for (int i = 0; i < iterations; i++) {
			int sum = 0;
			for (int j = 0, end = x.limit(); j < end; j++)
			    sum += x.get(j);
			deoptimize(sum);}}},
	    new Job("swap long LITTLE_ENDIAN") {
		public void work() throws Throwable {
		    b.order(ByteOrder.LITTLE_ENDIAN);
		    LongBuffer x = b.asLongBuffer();
		    for (int i = 0; i < iterations; i++) {
			int sum = 0;
			for (int j = 0, end = x.limit(); j < end; j++)
			    sum += x.get(j);
			deoptimize(sum);}}}
	};

	time(filter(filter, jobs));
    }
}

with results on sparc:

for f in -client -server; do mergeBench dolphin redundant jr -dsa -da $f SwapMicroBenchmark.java filter=LITTLE; done
==> javac -Xlint:all SwapMicroBenchmark.java
==> java -dsa -da -client SwapMicroBenchmark filter=LITTLE
Method                   Millis Ratio vs. dolphin
swap char LITTLE_ENDIAN    1736 1.000 0.974
swap short LITTLE_ENDIAN   1827 1.052 1.025
swap int LITTLE_ENDIAN      959 0.553 0.823
swap long LITTLE_ENDIAN     836 0.482 0.787
Merged results for dolphin vs. redundant running jr -dsa -da -server SwapMicroBenchmark.java filter=LITTLE
==> javac -Xlint:all SwapMicroBenchmark.java
==> java -dsa -da -server SwapMicroBenchmark filter=LITTLE
Method                   Millis Ratio vs. dolphin
swap char LITTLE_ENDIAN     172 1.000 1.000
swap short LITTLE_ENDIAN    195 1.133 0.920
swap int LITTLE_ENDIAN      132 0.769 0.608
swap long LITTLE_ENDIAN     183 1.063 0.729


and on x86:

for f in -client -server; do mergeBench dolphin redundant jr -dsa -da $f SwapMicroBenchmark.java filter=BIG; done
Merged results for dolphin vs. redundant running jr -dsa -da -client SwapMicroBenchmark.java filter=BIG
==> javac -Xlint:all SwapMicroBenchmark.java
==> java -dsa -da -client SwapMicroBenchmark filter=BIG
Method                Millis Ratio vs. dolphin
swap char BIG_ENDIAN     628 1.000 1.000
swap short BIG_ENDIAN    655 1.043 0.997
swap int BIG_ENDIAN      328 0.522 0.924
swap long BIG_ENDIAN     234 0.373 0.893
Merged results for dolphin vs. redundant running jr -dsa -da -server SwapMicroBenchmark.java filter=BIG
==> javac -Xlint:all SwapMicroBenchmark.java
==> java -dsa -da -server SwapMicroBenchmark filter=BIG
Method                Millis Ratio vs. dolphin
swap char BIG_ENDIAN      69 1.000 1.000
swap short BIG_ENDIAN     81 1.166 0.835
swap int BIG_ENDIAN       49 0.702 0.681
swap long BIG_ENDIAN      73 1.056 0.658
Posted Date : 2007-08-13 00:28:16.0
Comments
  
  Include a link with my name & email   


PLEASE NOTE: JDK6 is formerly known as Project Mustang