EVALUATION
Here's a simple and safe fix for the unbounded substring arraycopy problem,
leaving more extensive surgery of Charset optimization for later:
6633613: (str) StringCoding optimizations to avoid unnecessary array copies with Charset arg
Reviewed-by: iris
diff --git a/src/share/classes/java/lang/StringCoding.java b/src/share/classes/java/lang/StringCoding.java
--- a/src/share/classes/java/lang/StringCoding.java
+++ b/src/share/classes/java/lang/StringCoding.java
@@ -194,8 +194,7 @@ class StringCoding {
static char[] decode(Charset cs, byte[] ba, int off, int len) {
StringDecoder sd = new StringDecoder(cs, cs.name());
- byte[] b = Arrays.copyOf(ba, ba.length);
- return sd.decode(b, off, len);
+ return sd.decode(Arrays.copyOfRange(ba, off, off + len), 0, len);
}
static char[] decode(byte[] ba, int off, int len) {
@@ -293,8 +292,7 @@ class StringCoding {
static byte[] encode(Charset cs, char[] ca, int off, int len) {
StringEncoder se = new StringEncoder(cs, cs.name());
- char[] c = Arrays.copyOf(ca, ca.length);
- return se.encode(c, off, len);
+ return se.encode(Arrays.copyOfRange(ca, off, off + len), 0, len);
}
static byte[] encode(char[] ca, int off, int len) {
|
EVALUATION
Here's a microbenchmark showing the problem:
/*
* Copyright (c) 2007 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
* CA 95054 USA or visit www.sun.com if you need additional information or
* have any questions.
*/
/*
* This is not a regression test, but a micro-benchmark.
*
* I have run this as follows:
*
* for f in -client -server; do time mergeBench dolphin concurrent jr $f -da -dsa -Dverbose=true -DSecurityManager=true -Dsubsize=1000 -Dfile.encoding=US-ASCII StringCodingMicroBenchmark.java ; done
* for f in -client -server; do mergeBench dolphin concurrent jr -dsa -da $f StringCodingMicroBenchmark.java; done
*
* for f in -client -server; do mergeBench dolphin concurrent jr $f -da -dsa -DSecurityManager=true -Dsubsize=163840 -Dsize=168340 -Diterations=100 -Dfilter='consistent.*code: charset arg' -Dfile.encoding=UTF-8 StringCodingMicroBenchmark.java
*
* @author Martin Buchholz
*/
import java.util.*;
import java.nio.charset.*;
import java.util.concurrent.*;
import java.util.regex.Pattern;
/**
* Usage: java StringCodingMicroBenchmark
* [-Diterations=N] [-Dsize=N] [-Dsubsize=N]
* [-Dfilter=REGEXP] [-DSecurityManager=true]
* [-Dverbose=true]
*/
public class StringCodingMicroBenchmark {
abstract static class Job {
private final String name;
public Job(String name) { this.name = name; }
public String name() { return name; }
public abstract void work() throws Throwable;
}
private static void collectAllGarbage() {
final java.util.concurrent.CountDownLatch drained
= new java.util.concurrent.CountDownLatch(1);
try {
System.gc(); // enqueue finalizable objects
new Object() { protected void finalize() {
drained.countDown(); }};
System.gc(); // enqueue detector
drained.await(); // wait for finalizer queue to drain
System.gc(); // cleanup finalized objects
} catch (InterruptedException e) { throw new Error(e); }
}
/**
* Runs each job for long enough that all the runtime compilers
* have had plenty of time to warm up, i.e. get around to
* compiling everything worth compiling.
* Returns array of average times per job per run.
*/
private static long[] time0(Job ... jobs) throws Throwable {
final long warmupNanos = 10L * 1000L * 1000L * 1000L;
long[] nanoss = new long[jobs.length];
for (int i = 0; i < jobs.length; i++) {
collectAllGarbage();
long t0 = System.nanoTime();
long t;
int j = 0;
do { jobs[i].work(); j++; }
while ((t = System.nanoTime() - t0) < warmupNanos);
nanoss[i] = t/j;
}
return nanoss;
}
private static void time(Job ... jobs) throws Throwable {
long[] warmup = time0(jobs); // Warm up run
long[] nanoss = time0(jobs); // Real timing run
long[] milliss = new long[jobs.length];
double[] ratios = new double[jobs.length];
final String nameHeader = "Method";
final String millisHeader = "Millis";
final String ratioHeader = "Ratio";
int nameWidth = nameHeader.length();
int millisWidth = millisHeader.length();
int ratioWidth = ratioHeader.length();
for (int i = 0; i < jobs.length; i++) {
nameWidth = Math.max(nameWidth, jobs[i].name().length());
milliss[i] = nanoss[i]/(1000L * 1000L);
millisWidth = Math.max(millisWidth,
String.format("%d", milliss[i]).length());
ratios[i] = (double) nanoss[i] / (double) nanoss[0];
ratioWidth = Math.max(ratioWidth,
String.format("%.3f", ratios[i]).length());
}
String format = String.format("%%-%ds %%%dd %%%d.3f%%n",
nameWidth, millisWidth, ratioWidth);
String headerFormat = String.format("%%-%ds %%%ds %%%ds%%n",
nameWidth, millisWidth, ratioWidth);
System.out.printf(headerFormat, "Method", "Millis", "Ratio");
// Print out absolute and relative times, calibrated against first job
for (int i = 0; i < jobs.length; i++)
System.out.printf(format, jobs[i].name(), milliss[i], ratios[i]);
}
private static Job[] filter(Pattern filter, Job[] jobs) {
if (filter == null) return jobs;
Job[] newJobs = new Job[jobs.length];
int n = 0;
for (Job job : jobs)
if (filter.matcher(job.name()).find())
newJobs[n++] = job;
// Arrays.copyOf not available in JDK 5
Job[] ret = new Job[n];
System.arraycopy(newJobs, 0, ret, 0, n);
return ret;
}
static class PermissiveSecurityManger extends SecurityManager {
@Override public void checkPermission(java.security.Permission p) {
/* bien sur, Monsieur */
}
}
public static void main(String[] args) throws Throwable {
final int iterations = Integer.getInteger("iterations", 100000);
final int size = Integer.getInteger("size", 1024*16);
final int subsize = Integer.getInteger("subsize", 1);
final String regex = System.getProperty("filter");
final Pattern filter = (regex == null) ? null : Pattern.compile(regex);
final boolean useSecurityManager = Boolean.getBoolean("SecurityManager");
final boolean verbose = Boolean.getBoolean("verbose");
final Charset cs = Charset.defaultCharset();
final String csn = cs.name();
final String csn1 = "ISO-8859-2";
final String csn2 = "ISO-8859-4";
final Charset cs1 = Charset.forName(csn1);
final Charset cs2 = Charset.forName(csn2);
final Random rnd = new Random();
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < size; i++)
sb.append((char) rnd.nextInt(128));
final String string = sb.toString();
final byte[] bytes = string.getBytes("ASCII");
if (verbose)
System.err.printf(
"charset=%s filter=%s useSecurityManager=%b iterations=%d size=%d subsize=%d%n",
csn, regex, useSecurityManager, iterations, size, subsize);
if (useSecurityManager)
System.setSecurityManager(new PermissiveSecurityManger());
Job[] jobs = {
//----------------------------------------------------------------
new Job("consistent decode: default charset") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++)
new String(bytes, 0, subsize);
}},
new Job("consistent decode: charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++)
new String(bytes, 0, subsize, cs);
}},
new Job("consistent decode: name of charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++)
new String(bytes, 0, subsize, csn);
}},
//----------------------------------------------------------------
new Job("consistent encode: default charset") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++)
string.substring(0, subsize).getBytes();
}},
new Job("consistent encode: charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++)
string.substring(0, subsize).getBytes(cs);
}},
new Job("consistent encode: name of charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++)
string.substring(0, subsize).getBytes(csn);
}},
//----------------------------------------------------------------
new Job("alternating decode: default charset") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
new String(bytes, 0, subsize);
new String(bytes, 0, subsize, csn1);
}}},
new Job("alternating decode: charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
new String(bytes, 0, subsize, cs1);
new String(bytes, 0, subsize, cs2);
}}},
new Job("alternating decode: name of charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
new String(bytes, 0, subsize, csn1);
new String(bytes, 0, subsize, csn2);
}}},
//----------------------------------------------------------------
new Job("alternating encode: default charset") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
string.substring(0, subsize).getBytes();
string.substring(0, subsize).getBytes(csn1);
}}},
new Job("alternating encode: charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
string.substring(0, subsize).getBytes(cs1);
string.substring(0, subsize).getBytes(cs2);
}}},
new Job("alternating encode: name of charset arg") {
public void work() throws Throwable {
for (int i = 0; i < iterations; i++) {
string.substring(0, subsize).getBytes(csn1);
string.substring(0, subsize).getBytes(csn2);
}}}
};
time(filter(filter, jobs));
}
}
|