KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > com > sun > org > apache > xerces > internal > util > XML11Char


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999-2004 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package com.sun.org.apache.xerces.internal.util;
59
60 import java.util.Arrays JavaDoc;
61
62 /**
63  * This class defines the basic properties of characters in XML 1.1. The data
64  * in this class can be used to verify that a character is a valid
65  * XML 1.1 character or if the character is a space, name start, or name
66  * character.
67  * <p>
68  * A series of convenience methods are supplied to ease the burden
69  * of the developer. Using the character as an index into the <code>XML11CHARS</code>
70  * array and applying the appropriate mask flag (e.g.
71  * <code>MASK_VALID</code>), yields the same results as calling the
72  * convenience methods. There is one exception: check the comments
73  * for the <code>isValid</code> method for details.
74  *
75  * @author Glenn Marcy, IBM
76  * @author Andy Clark, IBM
77  * @author Arnaud Le Hors, IBM
78  * @author Neil Graham, IBM
79  * @author Michael Glavassevich, IBM
80  *
81  * @version $Id: XML11Char.java,v 1.6 2004/02/03 20:34:27 mrglavas Exp $
82  */

83 public class XML11Char {
84
85     //
86
// Constants
87
//
88

89     /** Character flags for XML 1.1. */
90     private static final byte XML11CHARS [] = new byte [1 << 16];
91
92     /** XML 1.1 Valid character mask. */
93     public static final int MASK_XML11_VALID = 0x01;
94
95     /** XML 1.1 Space character mask. */
96     public static final int MASK_XML11_SPACE = 0x02;
97
98     /** XML 1.1 Name start character mask. */
99     public static final int MASK_XML11_NAME_START = 0x04;
100
101     /** XML 1.1 Name character mask. */
102     public static final int MASK_XML11_NAME = 0x08;
103
104     /** XML 1.1 control character mask */
105     public static final int MASK_XML11_CONTROL = 0x10;
106
107     /** XML 1.1 content for external entities (valid - "special" chars - control chars) */
108     public static final int MASK_XML11_CONTENT = 0x20;
109
110     /** XML namespaces 1.1 NCNameStart */
111     public static final int MASK_XML11_NCNAME_START = 0x40;
112
113     /** XML namespaces 1.1 NCName */
114     public static final int MASK_XML11_NCNAME = 0x80;
115     
116     /** XML 1.1 content for internal entities (valid - "special" chars) */
117     public static final int MASK_XML11_CONTENT_INTERNAL = MASK_XML11_CONTROL | MASK_XML11_CONTENT;
118
119     //
120
// Static initialization
121
//
122

123     static {
124         
125         // Initializing the Character Flag Array
126
// Code generated by: XML11CharGenerator.
127

128         Arrays.fill(XML11CHARS, 1, 9, (byte) 17 ); // Fill 8 of value (byte) 17
129
XML11CHARS[9] = 35;
130         XML11CHARS[10] = 3;
131         Arrays.fill(XML11CHARS, 11, 13, (byte) 17 ); // Fill 2 of value (byte) 17
132
XML11CHARS[13] = 3;
133         Arrays.fill(XML11CHARS, 14, 32, (byte) 17 ); // Fill 18 of value (byte) 17
134
XML11CHARS[32] = 35;
135         Arrays.fill(XML11CHARS, 33, 38, (byte) 33 ); // Fill 5 of value (byte) 33
136
XML11CHARS[38] = 1;
137         Arrays.fill(XML11CHARS, 39, 45, (byte) 33 ); // Fill 6 of value (byte) 33
138
Arrays.fill(XML11CHARS, 45, 47, (byte) -87 ); // Fill 2 of value (byte) -87
139
XML11CHARS[47] = 33;
140         Arrays.fill(XML11CHARS, 48, 58, (byte) -87 ); // Fill 10 of value (byte) -87
141
XML11CHARS[58] = 45;
142         XML11CHARS[59] = 33;
143         XML11CHARS[60] = 1;
144         Arrays.fill(XML11CHARS, 61, 65, (byte) 33 ); // Fill 4 of value (byte) 33
145
Arrays.fill(XML11CHARS, 65, 91, (byte) -19 ); // Fill 26 of value (byte) -19
146
Arrays.fill(XML11CHARS, 91, 93, (byte) 33 ); // Fill 2 of value (byte) 33
147
XML11CHARS[93] = 1;
148         XML11CHARS[94] = 33;
149         XML11CHARS[95] = -19;
150         XML11CHARS[96] = 33;
151         Arrays.fill(XML11CHARS, 97, 123, (byte) -19 ); // Fill 26 of value (byte) -19
152
Arrays.fill(XML11CHARS, 123, 127, (byte) 33 ); // Fill 4 of value (byte) 33
153
Arrays.fill(XML11CHARS, 127, 133, (byte) 17 ); // Fill 6 of value (byte) 17
154
XML11CHARS[133] = 35;
155         Arrays.fill(XML11CHARS, 134, 160, (byte) 17 ); // Fill 26 of value (byte) 17
156
Arrays.fill(XML11CHARS, 160, 183, (byte) 33 ); // Fill 23 of value (byte) 33
157
XML11CHARS[183] = -87;
158         Arrays.fill(XML11CHARS, 184, 192, (byte) 33 ); // Fill 8 of value (byte) 33
159
Arrays.fill(XML11CHARS, 192, 215, (byte) -19 ); // Fill 23 of value (byte) -19
160
XML11CHARS[215] = 33;
161         Arrays.fill(XML11CHARS, 216, 247, (byte) -19 ); // Fill 31 of value (byte) -19
162
XML11CHARS[247] = 33;
163         Arrays.fill(XML11CHARS, 248, 768, (byte) -19 ); // Fill 520 of value (byte) -19
164
Arrays.fill(XML11CHARS, 768, 880, (byte) -87 ); // Fill 112 of value (byte) -87
165
Arrays.fill(XML11CHARS, 880, 894, (byte) -19 ); // Fill 14 of value (byte) -19
166
XML11CHARS[894] = 33;
167         Arrays.fill(XML11CHARS, 895, 8192, (byte) -19 ); // Fill 7297 of value (byte) -19
168
Arrays.fill(XML11CHARS, 8192, 8204, (byte) 33 ); // Fill 12 of value (byte) 33
169
Arrays.fill(XML11CHARS, 8204, 8206, (byte) -19 ); // Fill 2 of value (byte) -19
170
Arrays.fill(XML11CHARS, 8206, 8232, (byte) 33 ); // Fill 26 of value (byte) 33
171
XML11CHARS[8232] = 35;
172         Arrays.fill(XML11CHARS, 8233, 8255, (byte) 33 ); // Fill 22 of value (byte) 33
173
Arrays.fill(XML11CHARS, 8255, 8257, (byte) -87 ); // Fill 2 of value (byte) -87
174
Arrays.fill(XML11CHARS, 8257, 8304, (byte) 33 ); // Fill 47 of value (byte) 33
175
Arrays.fill(XML11CHARS, 8304, 8592, (byte) -19 ); // Fill 288 of value (byte) -19
176
Arrays.fill(XML11CHARS, 8592, 11264, (byte) 33 ); // Fill 2672 of value (byte) 33
177
Arrays.fill(XML11CHARS, 11264, 12272, (byte) -19 ); // Fill 1008 of value (byte) -19
178
Arrays.fill(XML11CHARS, 12272, 12289, (byte) 33 ); // Fill 17 of value (byte) 33
179
Arrays.fill(XML11CHARS, 12289, 55296, (byte) -19 ); // Fill 43007 of value (byte) -19
180
Arrays.fill(XML11CHARS, 57344, 63744, (byte) 33 ); // Fill 6400 of value (byte) 33
181
Arrays.fill(XML11CHARS, 63744, 64976, (byte) -19 ); // Fill 1232 of value (byte) -19
182
Arrays.fill(XML11CHARS, 64976, 65008, (byte) 33 ); // Fill 32 of value (byte) 33
183
Arrays.fill(XML11CHARS, 65008, 65534, (byte) -19 ); // Fill 526 of value (byte) -19
184

185     } // <clinit>()
186

187     //
188
// Public static methods
189
//
190

191     /**
192      * Returns true if the specified character is a space character
193      * as amdended in the XML 1.1 specification.
194      *
195      * @param c The character to check.
196      */

197     public static boolean isXML11Space(int c) {
198         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_SPACE) != 0);
199     } // isXML11Space(int):boolean
200

201     /**
202      * Returns true if the specified character is valid. This method
203      * also checks the surrogate character range from 0x10000 to 0x10FFFF.
204      * <p>
205      * If the program chooses to apply the mask directly to the
206      * <code>XML11CHARS</code> array, then they are responsible for checking
207      * the surrogate character range.
208      *
209      * @param c The character to check.
210      */

211     public static boolean isXML11Valid(int c) {
212         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_VALID) != 0)
213                 || (0x10000 <= c && c <= 0x10FFFF);
214     } // isXML11Valid(int):boolean
215

216     /**
217      * Returns true if the specified character is invalid.
218      *
219      * @param c The character to check.
220      */

221     public static boolean isXML11Invalid(int c) {
222         return !isXML11Valid(c);
223     } // isXML11Invalid(int):boolean
224

225     /**
226      * Returns true if the specified character is valid and permitted outside
227      * of a character reference.
228      * That is, this method will return false for the same set as
229      * isXML11Valid, except it also reports false for "control characters".
230      *
231      * @param c The character to check.
232      */

233     public static boolean isXML11ValidLiteral(int c) {
234         return ((c < 0x10000 && ((XML11CHARS[c] & MASK_XML11_VALID) != 0 && (XML11CHARS[c] & MASK_XML11_CONTROL) == 0))
235             || (0x10000 <= c && c <= 0x10FFFF));
236     } // isXML11ValidLiteral(int):boolean
237

238     /**
239      * Returns true if the specified character can be considered
240      * content in an external parsed entity.
241      *
242      * @param c The character to check.
243      */

244     public static boolean isXML11Content(int c) {
245         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT) != 0) ||
246                (0x10000 <= c && c <= 0x10FFFF);
247     } // isXML11Content(int):boolean
248

249     /**
250      * Returns true if the specified character can be considered
251      * content in an internal parsed entity.
252      *
253      * @param c The character to check.
254      */

255     public static boolean isXML11InternalEntityContent(int c) {
256         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_CONTENT_INTERNAL) != 0) ||
257                (0x10000 <= c && c <= 0x10FFFF);
258     } // isXML11InternalEntityContent(int):boolean
259

260     /**
261      * Returns true if the specified character is a valid name start
262      * character as defined by production [4] in the XML 1.1
263      * specification.
264      *
265      * @param c The character to check.
266      */

267     public static boolean isXML11NameStart(int c) {
268         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME_START) != 0)
269             || (0x10000 <= c && c < 0xF0000);
270     } // isXML11NameStart(int):boolean
271

272     /**
273      * Returns true if the specified character is a valid name
274      * character as defined by production [4a] in the XML 1.1
275      * specification.
276      *
277      * @param c The character to check.
278      */

279     public static boolean isXML11Name(int c) {
280         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NAME) != 0)
281             || (c >= 0x10000 && c < 0xF0000);
282     } // isXML11Name(int):boolean
283

284     /**
285      * Returns true if the specified character is a valid NCName start
286      * character as defined by production [4] in Namespaces in XML
287      * 1.1 recommendation.
288      *
289      * @param c The character to check.
290      */

291     public static boolean isXML11NCNameStart(int c) {
292         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME_START) != 0)
293             || (0x10000 <= c && c < 0xF0000);
294     } // isXML11NCNameStart(int):boolean
295

296     /**
297      * Returns true if the specified character is a valid NCName
298      * character as defined by production [5] in Namespaces in XML
299      * 1.1 recommendation.
300      *
301      * @param c The character to check.
302      */

303     public static boolean isXML11NCName(int c) {
304         return (c < 0x10000 && (XML11CHARS[c] & MASK_XML11_NCNAME) != 0)
305             || (0x10000 <= c && c < 0xF0000);
306     } // isXML11NCName(int):boolean
307

308     /**
309      * Returns whether the given character is a valid
310      * high surrogate for a name character. This includes
311      * all high surrogates for characters [0x10000-0xEFFFF].
312      * In other words everything excluding planes 15 and 16.
313      *
314      * @param c The character to check.
315      */

316     public static boolean isXML11NameHighSurrogate(int c) {
317         return (0xD800 <= c && c <= 0xDB7F);
318     }
319
320     /*
321      * [5] Name ::= NameStartChar NameChar*
322      */

323     /**
324      * Check to see if a string is a valid Name according to [5]
325      * in the XML 1.1 Recommendation
326      *
327      * @param name string to check
328      * @return true if name is a valid Name
329      */

330     public static boolean isXML11ValidName(String JavaDoc name) {
331         int length = name.length();
332         if (length == 0)
333             return false;
334         int i = 1;
335         char ch = name.charAt(0);
336         if( !isXML11NameStart(ch) ) {
337             if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
338                 char ch2 = name.charAt(1);
339                 if ( !XMLChar.isLowSurrogate(ch2) ||
340                      !isXML11NameStart(XMLChar.supplemental(ch, ch2)) ) {
341                     return false;
342                 }
343                 i = 2;
344             }
345             else {
346                 return false;
347             }
348         }
349         while (i < length) {
350             ch = name.charAt(i);
351             if ( !isXML11Name(ch) ) {
352                 if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
353                     char ch2 = name.charAt(i);
354                     if ( !XMLChar.isLowSurrogate(ch2) ||
355                          !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
356                         return false;
357                     }
358                 }
359                 else {
360                     return false;
361                 }
362             }
363             ++i;
364         }
365         return true;
366     } // isXML11ValidName(String):boolean
367

368
369     /*
370      * from the namespace 1.1 rec
371      * [4] NCName ::= NCNameStartChar NCNameChar*
372      */

373     /**
374      * Check to see if a string is a valid NCName according to [4]
375      * from the XML Namespaces 1.1 Recommendation
376      *
377      * @param name string to check
378      * @return true if name is a valid NCName
379      */

380     public static boolean isXML11ValidNCName(String JavaDoc ncName) {
381         int length = ncName.length();
382         if (length == 0)
383             return false;
384         int i = 1;
385         char ch = ncName.charAt(0);
386         if( !isXML11NCNameStart(ch) ) {
387             if ( length > 1 && isXML11NameHighSurrogate(ch) ) {
388                 char ch2 = ncName.charAt(1);
389                 if ( !XMLChar.isLowSurrogate(ch2) ||
390                      !isXML11NCNameStart(XMLChar.supplemental(ch, ch2)) ) {
391                     return false;
392                 }
393                 i = 2;
394             }
395             else {
396                 return false;
397             }
398         }
399         while (i < length) {
400             ch = ncName.charAt(i);
401             if ( !isXML11NCName(ch) ) {
402                 if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
403                     char ch2 = ncName.charAt(i);
404                     if ( !XMLChar.isLowSurrogate(ch2) ||
405                          !isXML11NCName(XMLChar.supplemental(ch, ch2)) ) {
406                         return false;
407                     }
408                 }
409                 else {
410                     return false;
411                 }
412             }
413             ++i;
414         }
415         return true;
416     } // isXML11ValidNCName(String):boolean
417

418     /*
419      * [7] Nmtoken ::= (NameChar)+
420      */

421     /**
422      * Check to see if a string is a valid Nmtoken according to [7]
423      * in the XML 1.1 Recommendation
424      *
425      * @param nmtoken string to check
426      * @return true if nmtoken is a valid Nmtoken
427      */

428     public static boolean isXML11ValidNmtoken(String JavaDoc nmtoken) {
429         int length = nmtoken.length();
430         if (length == 0)
431             return false;
432         for (int i = 0; i < length; ++i ) {
433             char ch = nmtoken.charAt(i);
434             if( !isXML11Name(ch) ) {
435                 if ( ++i < length && isXML11NameHighSurrogate(ch) ) {
436                     char ch2 = nmtoken.charAt(i);
437                     if ( !XMLChar.isLowSurrogate(ch2) ||
438                          !isXML11Name(XMLChar.supplemental(ch, ch2)) ) {
439                         return false;
440                     }
441                 }
442                 else {
443                     return false;
444                 }
445             }
446         }
447         return true;
448     } // isXML11ValidName(String):boolean
449

450 } // class XML11Char
451

452
Popular Tags