|
Evaluation
|
Here's a microbenchmark showing the problem:
/*
* Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
/*
* This is not a regression test, but a micro-benchmark.
*
* I have run this as follows:
*
* for f in -client -server; do time mergeBench dolphin concurrent jr $f -da -dsa -Dverbose=true -DSecurityManager=true -Dsubsize=1000 -Dfile.encoding=US-ASCII StringCodingMicroBenchmark.java ; done
* for f in -client -server; do mergeBench dolphin concurrent jr -dsa -da $f StringCodingMicroBenchmark.java; done
*
* for f in -client -server; do mergeBench dolphin concurrent jr $f -da -dsa -DSecurityManager=true -Dsubsize=163840 -Dsize=168340 -Diterations=100 -Dfilter='consistent.*code: charset arg' -Dfile.encoding=UTF-8 StringCodingMicroBenchmark.java
*
* @author Martin Buchholz
*/
import java.util.*;
import java.nio.charset.*;
import java.util.concurrent.*;
import java.util.regex.Pattern;
/**
* Usage: java StringCodingMicroBenchmark
* [-Diterations=N] [-Dsize=N] [-Dsubsize=N]
* [-Dfilter=REGEXP] [-DSecurityManager=true]
* [-Dverbose=true]
*/
public class StringCodingMicroBenchmark {
abstract static class Job {
private final String name;
public Job(String name) { this.name = name; }
public String name() { return name; }
public abstract void work() throws Throwable;
}
private static void collectAllGarbage() {
final java.util.concurrent.CountDownLatch drained
= new java.util.concurrent.CountDownLatch(1);
try {
System.gc(); // enqueue finalizable objects
new Object() { protected void finalize() {
drained.countDown(); }};
System.gc(); // enqueue detector
drained.await(); // wait for finalizer queue to drain
System.gc(); // cleanup finalized objects
} catch (InterruptedException e) { throw new Error(e); }
}
/**
* Runs each job for long enough that all the runtime compilers
* have had plenty of time to warm up, i.e. get around to
* compiling everything worth compiling.
* Returns array of average times per job per run.
*/
private static long[] time0(Job ... jobs) throws Throwable {
final long warmupNanos = 10L * 1000L * 1000L * 1000L;
long[] nanoss = new long[jobs.length];
for (int i = 0; i < jobs.length; i++) {
collectAllGarbage();
long t0 = System.nanoTime();
long t;
int j = 0;
do { jobs[i].work(); j++; }
while ((t = System.nanoTime() - t0) < warmupNanos);
nanoss[i] = t/j;
}
return nanoss;
}
private static void time(Job ... jobs) throws Throwable {
long[] warmup = time0(jobs); // Warm up run
long[] nanoss = time0(jobs); // Real timing run
long[] milliss = new long[jobs.length];
double[] ratios = new double[jobs.length];
final String nameHeader = "Method";
final String millisHeader = "Millis";
final String ratioHeader = "Ratio";
int nameWidth = nameHeader.length();
int millisWidth = millisHeader.length();
int ratioWidth = ratioHeader.length();
for (int i = 0; i < jobs.length; i++) {
nameWidth = Math.max(nameWidth, jobs[i].name().length());
milliss[i] = nanoss[i]/(1000L * 1000L);
millisWidth = Math.max(millisWidth,
String.format("%d", milliss[i]).length());
ratios[i] = (double) nanoss[i] / (double) nanoss[0];
ratioWidth = Math.max(ratioWidth,
String.format("%.3f", ratios[i]).length());
}
String format = String.format("%%-%ds %%%dd %%%d.3f%%n",
nameWidth, millisWidth, ratioWidth);
String headerFormat = String.format("%%-%ds %%%ds %%%ds%%n",
nameWidth, millisWidth, ratioWidth);
System.out.printf(headerFormat, "Method", "Millis", "Ratio");
// Print out absolute and relative times, calibrated against first job
for (int i = 0; i < jobs.length; i++)
System.out.printf(format, jobs[i].name(), milliss[i], ratios[i]);
}
private static Job[] filter(Pattern filter, Job[] jobs) {
if (filter == null) return jobs;
Job[] newJobs = new Job[jobs.length];
int n = 0;
for (Job job : jobs)
if (filter.matcher(job.name()).find())
newJobs[n++] = job;
// Arrays.copyOf not available in JDK 5
Job[] ret = new Job[n];
System.arraycopy(newJobs, 0, ret, 0, n);
return ret;
}
static class PermissiveSecurityManger extends SecurityManager {
@Override public void checkPermission(java.security.Permission p) {
/* bien sur, Monsieur */
}
}
public static void main(String[] args) throws Throwable {
final int iterations = Integer.getInteger("iterations", 100000);
final int size = Integer.getInteger("size", 1024*16);
final int subsize = Integer.getInteger("subsize", 1);
final String regex = System.getProperty("filter");
final Pattern filter = (regex == null) ? null : Pattern.compile(regex);
final boolean useSecurityManager = Boolean.getBoolean("SecurityManager");
final boolean verbose = Boolean.getBoolean("verbose");
final Charset cs = Charset.defaultCharset();
final String csn = cs.name();
final String csn1 = "ISO-8859-2";
final String csn2 = "ISO-8859-4";
final Charset cs1 = Charset.forName(csn1);
final Charset cs2 = Charset.forName(csn2);
final Random rnd = new Random();
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < size; i++)
sb.append((char) rnd.nextInt(128));
final String string = sb.toString();
final byte[] bytes = string.getBytes("ASCII");
if (verbose)
System.err.printf(
"charset=%s filter=%s useSecurityManager=%b iterations=%d size=%d subsize=%d%n",
csn, regex, useSecurityManager, iterations, size, subsize);
if (useSecurityManager)
System.setSecurityManager(new PermissiveSecurityManger());
Job[] jobs = {
//----------------------------------------------------------------
new Job("consistent decode: default charset") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++)
new String(bytes, 0, subsize);
}},
new Job("consistent decode: charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++)
new String(bytes, 0, subsize, cs);
}},
new Job("consistent decode: name of charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++)
new String(bytes, 0, subsize, csn);
}},
//----------------------------------------------------------------
new Job("consistent encode: default charset") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++)
string.substring(0, subsize).getBytes();
}},
new Job("consistent encode: charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++)
string.substring(0, subsize).getBytes(cs);
}},
new Job("consistent encode: name of charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++)
string.substring(0, subsize).getBytes(csn);
}},
//----------------------------------------------------------------
new Job("alternating decode: default charset") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
new String(bytes, 0, subsize);
new String(bytes, 0, subsize, csn1);
}}},
new Job("alternating decode: charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
new String(bytes, 0, subsize, cs1);
new String(bytes, 0, subsize, cs2);
}}},
new Job("alternating decode: name of charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
new String(bytes, 0, subsize, csn1);
new String(bytes, 0, subsize, csn2);
}}},
//----------------------------------------------------------------
new Job("alternating encode: default charset") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
string.substring(0, subsize).getBytes();
string.substring(0, subsize).getBytes(csn1);
}}},
new Job("alternating encode: charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
string.substring(0, subsize).getBytes(cs1);
string.substring(0, subsize).getBytes(cs2);
}}},
new Job("alternating encode: name of charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
string.substring(0, subsize).getBytes(csn1);
string.substring(0, subsize).getBytes(csn2);
}}}
};
time(filter(filter, jobs));
}
}
Posted Date : 2007-12-01 01:43:02.0
Here are sample results for x86, showing performance of coding methods
with Charset args to be arbitrarily slower than methods taking the
name of a Charset, which is unreasonable.
for f in -client -server; do jver dolphin jr $f -da -dsa -Dfile.encoding=ASCII StringCodingMicroBenchmark.java ; done); ./
==> javac -Xlint:all StringCodingMicroBenchmark.java
==> java -client -da -dsa -Dfile.encoding=ASCII StringCodingMicroBenchmark
Method Millis Ratio
consistent decode: default charset 39 1.000
consistent decode: charset arg 1080 27.649
consistent decode: name of charset arg 37 0.958
consistent encode: default charset 37 0.954
consistent encode: charset arg 2168 55.482
consistent encode: name of charset arg 36 0.930
alternating decode: default charset 157 4.040
alternating decode: charset arg 2160 55.288
alternating decode: name of charset arg 177 4.544
alternating encode: default charset 246 6.312
alternating encode: charset arg 4445 113.735
alternating encode: name of charset arg 267 6.854
==> javac -Xlint:all StringCodingMicroBenchmark.java
==> java -server -da -dsa -Dfile.encoding=ASCII StringCodingMicroBenchmark
Method Millis Ratio
consistent decode: default charset 20 1.000
consistent decode: charset arg 1133 54.965
consistent decode: name of charset arg 18 0.890
consistent encode: default charset 20 0.981
consistent encode: charset arg 2194 106.434
consistent encode: name of charset arg 20 0.982
alternating decode: default charset 83 4.052
alternating decode: charset arg 2122 102.933
alternating decode: name of charset arg 115 5.601
alternating encode: default charset 141 6.847
alternating encode: charset arg 4421 214.423
alternating encode: name of charset arg 170 8.267
Posted Date : 2007-12-01 01:43:02.0
The obvious fix is to replace Arrays.copyOf with Arrays.copyOfRange,
but I think we can do better.
Note that substrings share their parent String, but we do not want
to make copies of the parent string data when encoding the substring.
Posted Date : 2007-12-01 01:43:02.0
Here's a simple and safe fix for the unbounded substring arraycopy problem,
leaving more extensive surgery of Charset optimization for later:
6633613: (str) StringCoding optimizations to avoid unnecessary array copies with Charset arg
Reviewed-by: iris
diff --git a/src/share/classes/java/lang/StringCoding.java b/src/share/classes/java/lang/StringCoding.java
--- a/src/share/classes/java/lang/StringCoding.java
+++ b/src/share/classes/java/lang/StringCoding.java
@@ -194,8 +194,7 @@ class StringCoding {
static char[] decode(Charset cs, byte[] ba, int off, int len) {
StringDecoder sd = new StringDecoder(cs, cs.name());
- byte[] b = Arrays.copyOf(ba, ba.length);
- return sd.decode(b, off, len);
+ return sd.decode(Arrays.copyOfRange(ba, off, off + len), 0, len);
}
static char[] decode(byte[] ba, int off, int len) {
@@ -293,8 +292,7 @@ class StringCoding {
static byte[] encode(Charset cs, char[] ca, int off, int len) {
StringEncoder se = new StringEncoder(cs, cs.name());
- char[] c = Arrays.copyOf(ca, ca.length);
- return se.encode(c, off, len);
+ return se.encode(Arrays.copyOfRange(ca, off, off + len), 0, len);
}
static byte[] encode(char[] ca, int off, int len) {
Posted Date : 2008-03-06 01:02:00.0
|