KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > xquark > serialize > BasicXMLSerializer


1 /*
2  * This file belongs to the XQuark distribution.
3  * Copyright (C) 2003 Universite de Versailles Saint-Quentin.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU Lesser General Public
7  * License as published by the Free Software Foundation; either
8  * version 2.1 of the License, or (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  * Lesser General Public License for more details.
14  *
15  * You should have received a copy of the GNU Lesser General Public
16  * License along with this program; if not, write to the Free Software
17  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307.
18  * You can also get it at http://www.gnu.org/licenses/lgpl.html
19  *
20  * For more information on this software, see http://www.xquark.org.
21  */

22
23 /*
24  * CompactXMLSerializer.java
25  *
26  * Created on 5 novembre 2001, 14:47
27  */

28
29 package org.xquark.serialize;
30
31 import java.io.*;
32 import java.util.Arrays JavaDoc;
33
34 import org.xml.sax.Attributes JavaDoc;
35 import org.xml.sax.SAXException JavaDoc;
36 import org.xquark.util.DefaultXMLFilter;
37 import org.xquark.util.SAXConstants;
38
39 /**
40  * A simple SAX2 XML serializer implementing SAX ContentHandler and
41  * LexicalHandler.
42  *
43  * <B>IMPORTANT </B>: Contrary to {@link org.xquark.serialize.XMLSerializer},
44  * SAX2 XMLReader <code>http://xml.org/sax/features/namespaces</code> and
45  * <code>http://xml.org/sax/features/namespace-prefixes</code> features must
46  * be set to true for this serializer to work properly.
47  */

48 public class BasicXMLSerializer extends DefaultXMLFilter {
49     private static final String JavaDoc RCSRevision = "$Revision: 1.5 $";
50
51     private static final String JavaDoc RCSName = "$Name: $";
52
53     private final static int INDENT_CHAR_ARRAY_SIZE = 10;
54
55     private final static char[] INDENT_CHAR_ARRAY = { '\t', '\t', '\t', '\t',
56             '\t', '\t', '\t', '\t', '\t', '\t' };
57
58     private PrintWriter writer;
59
60     private boolean indent = true;
61
62     private boolean orderAttributes = false;
63
64     private SortableAttributes orderedAtts;
65
66     private boolean useIgnorableWhitespaces = true;
67
68     private String JavaDoc encoding = null;
69
70     private String JavaDoc userSetEncoding = null;
71
72     private boolean generateXMLDeclaration = true;
73
74     private boolean generateEncodingDeclaration = true;
75
76     private boolean closeStreamAutomatically = false;
77
78     /** element hierarchy depth */
79     private int depth = -1;
80
81     /** max depth where indentation is performed */
82     private int level = 0;
83
84     private boolean cdata = false;
85
86     private boolean elementPending = false;
87
88     private boolean lastStartIsStart = false;
89
90     private boolean charDataEncountered = false;
91
92     private boolean extraDataEncountered = false;
93
94     private boolean prologGenerated = false;
95
96     private StringBuffer JavaDoc buf = new StringBuffer JavaDoc();
97
98     BasicXMLSerializer() {
99     }
100
101     /**
102      * Creates new BasicXMLSerializer
103      *
104      * @param out
105      * the OutputStream where the serializer writes bytes. For
106      * instance a FileOutputStream.
107      */

108     public BasicXMLSerializer(OutputStream out)
109             throws UnsupportedEncodingException {
110         setOutputStream(out);
111     }
112
113     /**
114      * Creates new BasicXMLSerializer
115      *
116      * @param out
117      * the writer where the serializer writes characters. For
118      * instance a FileOutputStream.
119      */

120     public BasicXMLSerializer(Writer out) {
121         setWriter(out);
122     }
123
124     /**
125      * Creates new BasicXMLSerializer
126      *
127      * @param out
128      * the writer where the serializer writes characters. For
129      * instance a FileOutputStream.
130      * @param encoding
131      * a string for the MIME encoding (must be supported by the JDK
132      * used)
133      * @see java.io.OutputStream
134      */

135     public BasicXMLSerializer(OutputStream out, String JavaDoc encoding)
136             throws UnsupportedEncodingException {
137         setDefaultEncoding(encoding);
138         setOutputStream(out);
139     }
140
141     /**
142      * Creates new BasicXMLSerializer
143      *
144      * @param out
145      * the OutputStream where the serializer writes bytes. For
146      * instance a FileOutputStream.
147      * @param encoding
148      * a string for the MIME encoding (must be compatible with the
149      * encoding used by the writer)
150      */

151     public BasicXMLSerializer(Writer out, String JavaDoc encoding) {
152         this();
153         setDefaultEncoding(encoding);
154         setWriter(out);
155     }
156
157     /**
158      * Set the OutputStream where the serializer writes bytes.
159      *
160      * <B>Once this methods is called output Writer is created with the current
161      * encoding and will not change. </B>
162      *
163      * @param out
164      * the OutputStream where the serializer writes bytes. For
165      * instance a FileOutputStream.
166      */

167     public void setOutputStream(OutputStream out)
168             throws UnsupportedEncodingException {
169         Writer outputWriter;
170         if (userSetEncoding == null) {
171             if (out.equals(System.out) || out.equals(System.err)) {
172                 encoding = null;
173                 outputWriter = new OutputStreamWriter(out);
174             } else {
175                 encoding = "UTF-8";
176                 outputWriter = new OutputStreamWriter(out, "UTF-8");
177             }
178         } else {
179             encoding = userSetEncoding;
180             outputWriter = new OutputStreamWriter(out, encoding);
181         }
182
183         writer = new PrintWriter(new BufferedWriter(outputWriter), false);
184     }
185
186     /**
187      * Set the Writer where the serializer writes characters.
188      *
189      * <B>Once this methods is called output encoding is set with the current
190      * value and will not change. </B>
191      *
192      * @param out
193      * the writer where the serializer writes characters. For
194      * instance a FileOutputStream.
195      */

196     public void setWriter(Writer out) {
197         writer = new PrintWriter(out, false);
198
199         encoding = userSetEncoding;
200     }
201
202     /**
203      * Set the indenting mode. <B>Warning: </B>Default behavior is to perform
204      * automatic indentation ( ignorable whitespaces generated by the XML
205      * processor are ignored).
206      *
207      * @param indent
208      * if true, tabs are used at the beginning of lines to indent the
209      * output file.
210      */

211     public void setIndent(boolean indent) {
212         this.indent = indent;
213     }
214
215     /**
216      * Turn on or off the use of ignorable whitespace for indentation.
217      * <B>Warning: </B>This flag is ignored if automatic indentation is on.
218      *
219      * @param use
220      * if true (the default), ignorable whitespace are used for
221      * indentation unless automatic indentation is on.
222      */

223     public void setUseIgnorableWhitespaces(boolean use) {
224         useIgnorableWhitespaces = use;
225     }
226
227     /**
228      * Set the ordering mode for attributes. The default behavior is to perform
229      * no ordering.
230      *
231      * @param mode
232      * If true, ordering is performed following the
233      * {@link <a HREF="http://www.w3.org/TR/xml-c14n">Canonical XML<a>}
234      * W3C recommendation. If false, attributes are serialized as
235      * passed by the SAX2 XMLReader.
236      */

237     public void setCanonicalOutput(boolean mode) {
238         orderAttributes = mode;
239     }
240
241     /**
242      * Set the character encoding.
243      *
244      * @param encoding
245      * a string specifying the character encoding, which must be
246      * supported by the JDK. Example: UTF-8, ISO-8859-1
247      */

248     public void setDefaultEncoding(String JavaDoc encoding) {
249         this.userSetEncoding = encoding;
250     }
251
252     /**
253      * Enable XML declaration generation on startDocument() event.
254      *
255      * @param enable
256      * XML Declaration is generated if true. Default is true.
257      */

258     public void setGenerateXMLDeclaration(boolean enable) {
259         generateXMLDeclaration = enable;
260     }
261
262     /**
263      * Enable encoding attribute generation in XML declaration on
264      * startDocument() event. Considered only if XML decleration enabled.
265      *
266      * @param enable
267      * encoding declaration generation if true. Default is true.
268      */

269     public void setGenerateEncodingDeclaration(boolean enable) {
270         generateEncodingDeclaration = enable;
271     }
272
273     /**
274      * Enable automatic close of the output user stream on endDocument() event.
275      *
276      * @param close
277      * Output stream will be closed if true. Default is false.
278      */

279     public void setAutoStreamClose(boolean close) {
280         closeStreamAutomatically = close;
281     }
282
283     /**
284      * @return the indenting mode.
285      */

286     public boolean getIndent() {
287         return indent;
288     }
289
290     /**
291      * @return the status of the use of gnorable whitespace by the serializer.
292      */

293     public boolean getUseIgnorableWhitespaces() {
294         return useIgnorableWhitespaces;
295     }
296
297     /**
298      * Accessor to the current ordering mode for attributes.
299      *
300      * @return the current ordering mode for attributes.
301      */

302     public boolean getCanonicalOutput() {
303         return orderAttributes;
304     }
305
306     /**
307      * @return the character encoding in use, if previously specified.
308      */

309     public String JavaDoc getDefaultEncoding() {
310         return userSetEncoding;
311     }
312
313     /**
314      * Accessor to the current flag for XML declaration generation.
315      *
316      * @return if true, the XML declaration will be generated.
317      */

318     public boolean getGenerateXMLDeclaration() {
319         return generateXMLDeclaration;
320     }
321
322     /**
323      * Return the value set for stream automatic close feature.
324      *
325      * @see #setAutoStreamClose(boolean).
326      */

327     public boolean getAutoStreamClose() {
328         return closeStreamAutomatically;
329     }
330
331     /**
332      * Reset internal state before reuse.
333      */

334     public void reset() {
335         // writer = null;
336
level = 0;
337         cdata = false;
338         elementPending = false;
339         lastStartIsStart = false;
340         charDataEncountered = false;
341         prologGenerated = false;
342         depth = -1;
343     }
344
345     /**
346      * Closes the user stream.
347      */

348     public void close() {
349         writer.close();
350     }
351
352     /**
353      * Flushes the user stream.
354      */

355     public void flush() {
356         writer.flush();
357     }
358
359     ////////////////////////////////////////////////////////////////////////////
360
// Handlers Implementation
361
////////////////////////////////////////////////////////////////////////////
362

363     public void startDocument() throws SAXException JavaDoc {
364         if (generateXMLDeclaration) {
365             if (encoding == null)
366                 writer.write("<?xml version=\"1.0\"?>");
367             else {
368                 writer.write("<?xml version=\"1.0\"");
369                 if (generateEncodingDeclaration)
370                     writer.write(" encoding=\"" + encoding + "\"");
371                 writer.write("?>");
372             }
373             prologGenerated = true;
374         }
375     }
376
377     public void characters(char[] ch, int start, int length)
378             throws SAXException JavaDoc {
379         if (length <= 0)
380             return;
381
382         // Complete potential start markup
383
completeStartTag();
384
385         if (cdata)
386             writer.write(ch, start, length);
387         else {
388             int max = start + length;
389             int i, last = start;
390             for (i = start; i < max; i++) {
391                 if (isCharDataForbidden(ch[i])) {
392                     writer.write(ch, last, i - last);
393                     writer.write(getPredefinedEntityRef(ch[i]));
394                     last = i + 1;
395                 } else if (ch[i] == 0xA) {
396                     writer.write(ch, last, i - last);
397                     writer.println();
398                     last = i + 1;
399                 }
400             }
401             if ((last == start) || (last < max))
402                 writer.write(ch, last, i - last);
403         }
404
405         charDataEncountered = true;
406     }
407
408     public void ignorableWhitespace(char[] ch, int start, int length)
409             throws SAXException JavaDoc {
410         if (indent || !useIgnorableWhitespaces || (length <= 0))
411             return;
412
413         // Complete potential start markup
414
completeStartTag();
415
416         int max = start + length;
417         int i, last = start;
418         for (i = start; i < max; i++) {
419             if (ch[i] == 0xA) {
420                 writer.write(ch, last, i - last);
421                 writer.println();
422                 last = i + 1;
423             }
424         }
425         if ((last == start) || (last < max))
426             writer.write(ch, last, i - last);
427     }
428
429     private boolean isCharDataForbidden(int ch) {
430         return (ch == '<') || (ch == '&') || (ch == '>'); // last is not
431
// forbidden but more
432
// elegant
433
}
434
435     private boolean isAttributeForbidden(int ch) {
436         return (ch == '<') || (ch == '&') || (ch == '"');
437     }
438
439     private String JavaDoc getPredefinedEntityRef(int ch) {
440         // Encode special XML characters into the equivalent character
441
// references.
442
// These five are defined by default for all XML documents.
443
switch (ch) {
444         case '<':
445             return "&lt;";
446         case '>':
447             return "&gt;";
448         case '"':
449             return "&quot;";
450         case '\'':
451             return "&apos;";
452         case '&':
453             return "&amp;";
454         }
455         return null;
456     }
457
458     public void startElement(String JavaDoc namespaceURI, String JavaDoc localName,
459             String JavaDoc qName, Attributes JavaDoc atts) throws SAXException JavaDoc {
460         // Complete potential start markup
461
completeStartTag();
462
463         depth++;
464         if (!charDataEncountered && carriageReturn(depth))
465             level = depth + 1;
466
467         // begin starting markup for element
468
buf.append('<');
469         buf.append(qName);
470
471         // Process attributes (xmlns are supposed to be included)
472
if (orderAttributes)
473             atts = canonicalizeAttributes(atts);
474
475         int nbAtts = atts.getLength();
476         String JavaDoc attValue;
477         for (int i = 0; i < nbAtts; i++) {
478             buf.append(' ');
479             buf.append(atts.getQName(i));
480             buf.append("=\"");
481
482             attValue = atts.getValue(i);
483
484             int max = attValue.length();
485             int j, last = 0;
486             for (j = 0; j < max; j++) {
487                 if (isAttributeForbidden(attValue.charAt(j))) {
488                     buf.append(attValue.substring(last, j));
489                     buf.append(getPredefinedEntityRef(attValue.charAt(j)));
490                     last = j + 1;
491                 }
492             }
493             if ((last == 0) || (last < max))
494                 buf.append(attValue.substring(last, j));
495
496             buf.append('"');
497         }
498
499         extraDataEncountered = false;
500         charDataEncountered = false;
501         elementPending = true;
502         lastStartIsStart = true;
503     }
504
505     private Attributes JavaDoc canonicalizeAttributes(Attributes JavaDoc atts) {
506         int nbAtts = atts.getLength();
507         if (nbAtts <= 1)
508             return atts;
509
510         if (orderedAtts == null)
511             orderedAtts = new SortableAttributes(atts);
512         else
513             orderedAtts.setAttributes(atts);
514
515         orderedAtts.sort();
516         return orderedAtts;
517     }
518
519     public void completeStartTag() {
520         // Complete potential start markup
521
if (elementPending) {
522             buf.append('>');
523             writer.write(buf.toString());
524             buf.setLength(0);
525             elementPending = false;
526         }
527     }
528
529     public void processingInstruction(String JavaDoc target, String JavaDoc data)
530             throws SAXException JavaDoc {
531         completeStartTag();
532
533         writer.write("<?");
534         writer.write(target);
535         writer.write(' ');
536
537         char[] ch = data.toCharArray();
538         int max = data.length();
539         int i, last = 0;
540         for (i = 0; i < max; i++) {
541             if (ch[i] == 0xA) {
542                 writer.write(ch, last, i - last);
543                 writer.println();
544                 last = i + 1;
545             }
546         }
547         if ((last == 0) || (last < max))
548             writer.write(ch, last, i - last);
549
550         writer.write("?>");
551         extraDataEncountered = true;
552     }
553
554     public void endElement(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc qName)
555             throws SAXException JavaDoc {
556         // Complete potential start markup
557
if (elementPending) {
558             buf.append("/>");
559             writer.write(buf.toString());
560             buf.setLength(0);
561         } else {
562             if (!charDataEncountered && (!lastStartIsStart || (depth == 0))
563                     && carriageReturn(depth))
564                 level = depth;
565
566             // process ending markup for element
567
buf.append("</");
568             buf.append(qName);
569             buf.append('>');
570             writer.write(buf.toString());
571             buf.setLength(0);
572         }
573         charDataEncountered = false;
574         extraDataEncountered = false;
575         elementPending = false;
576         lastStartIsStart = false;
577         depth--;
578         writer.flush();
579     }
580
581     public void endDocument() throws SAXException JavaDoc {
582         writer.println();
583         if (closeStreamAutomatically)
584             writer.close();
585         else
586             writer.flush();
587         reset();
588     }
589
590     public void startCDATA() throws SAXException JavaDoc {
591         // Complete potential start markup
592
completeStartTag();
593         writer.print("<![CDATA[");
594         cdata = true;
595     }
596
597     public void endCDATA() throws SAXException JavaDoc {
598         writer.print("]]>");
599         cdata = false;
600     }
601
602     public void comment(char[] ch, int start, int length) throws SAXException JavaDoc {
603         completeStartTag();
604
605         writer.write("<!--");
606
607         int max = start + length;
608         int i, last = start;
609         for (i = start; i < max; i++) {
610             if (ch[i] == 0xA) {
611                 writer.write(ch, last, i - last);
612                 writer.println();
613                 last = i + 1;
614             }
615         }
616         if ((last == start) || (last < max))
617             writer.write(ch, last, i - last);
618
619         writer.write("-->");
620         extraDataEncountered = true;
621     }
622
623     private class SortableAttributes implements Attributes JavaDoc {
624         SortableAttribute[] attTab;
625
626         int nbAtts = 0;
627
628         SortableAttributes(Attributes JavaDoc atts) {
629             setAttributes(atts);
630         }
631
632         void setAttributes(Attributes JavaDoc atts) {
633             nbAtts = atts.getLength();
634
635             if ((attTab == null) || (attTab.length < nbAtts)) {
636                 attTab = new SortableAttribute[nbAtts];
637                 for (int i = 0; i < nbAtts; i++)
638                     attTab[i] = new SortableAttribute(atts.getURI(i), atts
639                             .getLocalName(i), atts.getQName(i), atts
640                             .getValue(i));
641             } else
642                 for (int i = 0; i < nbAtts; i++)
643                     attTab[i].set(atts.getURI(i), atts.getLocalName(i), atts
644                             .getQName(i), atts.getValue(i));
645         }
646
647         void sort() {
648             Arrays.sort(attTab, 0, nbAtts);
649         }
650
651         public int getIndex(String JavaDoc str) {
652             return -1; // not Implemented
653
}
654
655         public int getIndex(String JavaDoc str, String JavaDoc str1) {
656             return -1; // not Implemented
657
}
658
659         public int getLength() {
660             return nbAtts;
661         }
662
663         public String JavaDoc getLocalName(int param) {
664             return attTab[param].localName;
665         }
666
667         public String JavaDoc getQName(int param) {
668             return attTab[param].QName;
669         }
670
671         public String JavaDoc getType(int param) {
672             return ""; // not Implemented
673
}
674
675         public String JavaDoc getType(String JavaDoc str) {
676             return ""; // not Implemented
677
}
678
679         public String JavaDoc getType(String JavaDoc str, String JavaDoc str1) {
680             return ""; // not Implemented
681
}
682
683         public String JavaDoc getURI(int param) {
684             return attTab[param].namespaceURI;
685         }
686
687         public String JavaDoc getValue(String JavaDoc str) {
688             return ""; // not Implemented
689
}
690
691         public String JavaDoc getValue(int param) {
692             return attTab[param].value;
693         }
694
695         public String JavaDoc getValue(String JavaDoc str, String JavaDoc str1) {
696             return ""; // not Implemented
697
}
698
699     }
700
701     ////////////////////////////////////////////////////////////////////
702
// PRIVATE
703
////////////////////////////////////////////////////////////////////
704
private boolean carriageReturn(int indent) {
705         boolean ret = false;
706         if (this.indent || (level == 0)) {
707             ret = (indent <= level);
708             if (ret) // not for mixed
709
{
710                 if (indent == 0) {
711                     if (prologGenerated)
712                         writer.println();
713                 } else {
714                     writer.println();
715                     indent(indent);
716                 }
717             }
718         }
719         return ret;
720     }
721
722     private void indent(int indent) {
723         if (indent < 0)
724             return;
725         int quotient = indent / INDENT_CHAR_ARRAY_SIZE;
726
727         for (int i = 1; i <= quotient; i++) {
728             writer.write(INDENT_CHAR_ARRAY, 0, INDENT_CHAR_ARRAY_SIZE);
729         }
730         writer.write(INDENT_CHAR_ARRAY, 0, indent % INDENT_CHAR_ARRAY_SIZE);
731     }
732
733     /**
734      * Canonical XML rules are:
735      * <ul>
736      * <li>An element's namespace and attribute nodes have a document order
737      * position greater than the element but less than any child node of the
738      * element.</li>
739      * <li>Namespace nodes have a lesser document order position than attribute
740      * nodes.</li>
741      * <li>An element's namespace nodes are sorted lexicographically by local
742      * name (the default namespace node, if one exists, has no local name and is
743      * therefore lexicographically least).</li>
744      * <li>An element's attribute nodes are sorted lexicographically with
745      * namespace URI as the primary key and local name as the secondary key (an
746      * empty namespace URI is lexicographically least).</li>
747      * </ul>
748      */

749     private class SortableAttribute implements Comparable JavaDoc, SAXConstants {
750         String JavaDoc namespaceURI;
751
752         String JavaDoc localName;
753
754         String JavaDoc QName;
755
756         String JavaDoc value;
757
758         SortableAttribute(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc QName,
759                 String JavaDoc value) {
760             set(namespaceURI, localName, QName, value);
761         }
762
763         void set(String JavaDoc namespaceURI, String JavaDoc localName, String JavaDoc QName,
764                 String JavaDoc value) {
765             this.namespaceURI = namespaceURI;
766             this.localName = localName;
767             this.QName = QName;
768             this.value = value;
769         }
770
771         public int compareTo(Object JavaDoc o) {
772             SortableAttribute obj = (SortableAttribute) o;
773
774             if (isNamespace()) {
775                 if (obj.isNamespace())
776                     return localName.compareTo(obj.localName);
777                 else
778                     return -1;
779             } else if (obj.isNamespace())
780                 return 1;
781             else // both are normal attributes
782
{
783                 int comparison = namespaceURI.compareTo(obj.namespaceURI);
784                 if (comparison == 0) // then check the local name
785
return localName.compareTo(obj.localName);
786                 else
787                     return comparison;
788             }
789         }
790
791         boolean isNamespace() {
792             return (namespaceURI.equals(XMLNS_URI));
793         }
794     }
795
796 }
Popular Tags