EVALUATION
src/share/classes/com/sun/org/apache/xml/internal/serializer/ToStream.writeAttrString()
deos not write out surrogates correctly. accumDefaultEscape()returns i+2 if a pair of
surrogates have been written out successfully, but iteration loop in writeAttrString
always step 1, the result is the low-half of the surrogates is always being incorrectly
attached for each pair of surrogates output.
attached is the test case.
String attrKey = "key";
String attrValue = "\ud800\udc00";
Document doc =
DocumentBuilderFactory.newInstance()
.newDocumentBuilder()
.getDOMImplementation()
.createDocument(null, null, null);
Element xmlRoot = doc.createElement("root");
xmlRoot.setAttribute(attrKey, attrValue);
doc.appendChild(xmlRoot);
Transformer t = TransformerFactory.newInstance()
.newTransformer();
BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream("foo.xml"), "UTF-8"));
t.transform(new DOMSource(doc), new StreamResult(bw));
bw.close();
|
SUGGESTED FIX
*** /tmp/geta27253 Wed Dec 6 15:46:51 2006
--- ToStream.java Wed Dec 6 15:45:53 2006
***************
*** 1642,1648 ****
{
int pos = accumDefaultEntity(writer, ch, i, chars, len, fromTextNode, escLF);
-
if (i == pos)
{
if (Encodings.isHighUTF16Surrogate(ch))
--- 1642,1647 ----
***************
*** 1967,1978 ****
string.getChars(0,len, m_attrBuff, 0);
final char[] stringChars = m_attrBuff;
! for (int i = 0; i < len; i++)
{
char ch = stringChars[i];
if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
{
writer.write(ch);
}
else
{ // I guess the parser doesn't normalize cr/lf in attributes. -sb
--- 1966,1980 ----
string.getChars(0,len, m_attrBuff, 0);
final char[] stringChars = m_attrBuff;
! int i = 0;
! while (i < len)
! // for (int i = 0; i < len; i++)
{
char ch = stringChars[i];
if (escapingNotNeeded(ch) && (!m_charInfo.isSpecialAttrChar(ch)))
{
writer.write(ch);
+ i++;
}
else
{ // I guess the parser doesn't normalize cr/lf in attributes. -sb
***************
*** 1984,1990 ****
// ch = CharInfo.S_LINEFEED;
// }
! accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
}
}
--- 1986,1992 ----
// ch = CharInfo.S_LINEFEED;
// }
! i = accumDefaultEscape(writer, ch, i, stringChars, len, false, true);
}
}
|