KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > jboss > axis > utils > XMLChar


1 /*
2  * The Apache Software License, Version 1.1
3  *
4  *
5  * Copyright (c) 1999-2002 The Apache Software Foundation. All rights
6  * reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Apache Software Foundation (http://www.apache.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Xerces" and "Apache Software Foundation" must
28  * not be used to endorse or promote products derived from this
29  * software without prior written permission. For written
30  * permission, please contact apache@apache.org.
31  *
32  * 5. Products derived from this software may not be called "Apache",
33  * nor may "Apache" appear in their name, without prior written
34  * permission of the Apache Software Foundation.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
40  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
41  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
42  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
43  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
44  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
45  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
46  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47  * SUCH DAMAGE.
48  * ====================================================================
49  *
50  * This software consists of voluntary contributions made by many
51  * individuals on behalf of the Apache Software Foundation and was
52  * originally based on software copyright (c) 1999, International
53  * Business Machines, Inc., http://www.apache.org. For more
54  * information on the Apache Software Foundation, please see
55  * <http://www.apache.org/>.
56  */

57
58 package org.jboss.axis.utils;
59
60 /**
61  * This class defines the basic XML character properties. The data
62  * in this class can be used to verify that a character is a valid
63  * XML character or if the character is a space, name start, or name
64  * character.
65  * <p/>
66  * A series of convenience methods are supplied to ease the burden
67  * of the developer. Because inlining the checks can improve per
68  * character performance, the tables of character properties are
69  * public. Using the character as an index into the <code>CHARS</code>
70  * array and applying the appropriate mask flag (e.g.
71  * <code>MASK_VALID</code>), yields the same results as calling the
72  * convenience methods. There is one exception: check the comments
73  * for the <code>isValid</code> method for details.
74  *
75  * @author Glenn Marcy, IBM
76  * @author Andy Clark, IBM
77  * @author Eric Ye, IBM
78  * @author Arnaud Le Hors, IBM
79  * @author Rahul Srivastava, Sun Microsystems Inc.
80  * @version $Id: XMLChar.java,v 1.1.2.1 2005/03/02 14:30:12 tdiesler Exp $
81  */

82 public class XMLChar
83 {
84
85    //
86
// Constants
87
//
88

89    /**
90     * Character flags.
91     */

92    private static final byte[] CHARS = new byte[1 << 16];
93
94    /**
95     * Valid character mask.
96     */

97    public static final int MASK_VALID = 0x01;
98
99    /**
100     * Space character mask.
101     */

102    public static final int MASK_SPACE = 0x02;
103
104    /**
105     * Name start character mask.
106     */

107    public static final int MASK_NAME_START = 0x04;
108
109    /**
110     * Name character mask.
111     */

112    public static final int MASK_NAME = 0x08;
113
114    /**
115     * Pubid character mask.
116     */

117    public static final int MASK_PUBID = 0x10;
118
119    /**
120     * Content character mask. Special characters are those that can
121     * be considered the start of markup, such as '&lt;' and '&amp;'.
122     * The various newline characters are considered special as well.
123     * All other valid XML characters can be considered content.
124     * <p/>
125     * This is an optimization for the inner loop of character scanning.
126     */

127    public static final int MASK_CONTENT = 0x20;
128
129    /**
130     * NCName start character mask.
131     */

132    public static final int MASK_NCNAME_START = 0x40;
133
134    /**
135     * NCName character mask.
136     */

137    public static final int MASK_NCNAME = 0x80;
138
139    //
140
// Static initialization
141
//
142

143    static
144    {
145
146       //
147
// [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] |
148
// [#xE000-#xFFFD] | [#x10000-#x10FFFF]
149
//
150

151       int charRange[] = {
152          0x0009, 0x000A, 0x000D, 0x000D, 0x0020, 0xD7FF, 0xE000, 0xFFFD,
153       };
154
155       //
156
// [3] S ::= (#x20 | #x9 | #xD | #xA)+
157
//
158

159       int spaceChar[] = {
160          0x0020, 0x0009, 0x000D, 0x000A,
161       };
162
163       //
164
// [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
165
// CombiningChar | Extender
166
//
167

168       int nameChar[] = {
169          0x002D, 0x002E, // '-' and '.'
170
};
171
172       //
173
// [5] Name ::= (Letter | '_' | ':') (NameChar)*
174
//
175

176       int nameStartChar[] = {
177          0x003A, 0x005F, // ':' and '_'
178
};
179
180       //
181
// [13] PubidChar ::= #x20 | 0xD | 0xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
182
//
183

184       int pubidChar[] = {
185          0x000A, 0x000D, 0x0020, 0x0021, 0x0023, 0x0024, 0x0025, 0x003D,
186          0x005F
187       };
188
189       int pubidRange[] = {
190          0x0027, 0x003B, 0x003F, 0x005A, 0x0061, 0x007A
191       };
192
193       //
194
// [84] Letter ::= BaseChar | Ideographic
195
//
196

197       int letterRange[] = {
198          // BaseChar
199
0x0041, 0x005A, 0x0061, 0x007A, 0x00C0, 0x00D6, 0x00D8, 0x00F6,
200          0x00F8, 0x0131, 0x0134, 0x013E, 0x0141, 0x0148, 0x014A, 0x017E,
201          0x0180, 0x01C3, 0x01CD, 0x01F0, 0x01F4, 0x01F5, 0x01FA, 0x0217,
202          0x0250, 0x02A8, 0x02BB, 0x02C1, 0x0388, 0x038A, 0x038E, 0x03A1,
203          0x03A3, 0x03CE, 0x03D0, 0x03D6, 0x03E2, 0x03F3, 0x0401, 0x040C,
204          0x040E, 0x044F, 0x0451, 0x045C, 0x045E, 0x0481, 0x0490, 0x04C4,
205          0x04C7, 0x04C8, 0x04CB, 0x04CC, 0x04D0, 0x04EB, 0x04EE, 0x04F5,
206          0x04F8, 0x04F9, 0x0531, 0x0556, 0x0561, 0x0586, 0x05D0, 0x05EA,
207          0x05F0, 0x05F2, 0x0621, 0x063A, 0x0641, 0x064A, 0x0671, 0x06B7,
208          0x06BA, 0x06BE, 0x06C0, 0x06CE, 0x06D0, 0x06D3, 0x06E5, 0x06E6,
209          0x0905, 0x0939, 0x0958, 0x0961, 0x0985, 0x098C, 0x098F, 0x0990,
210          0x0993, 0x09A8, 0x09AA, 0x09B0, 0x09B6, 0x09B9, 0x09DC, 0x09DD,
211          0x09DF, 0x09E1, 0x09F0, 0x09F1, 0x0A05, 0x0A0A, 0x0A0F, 0x0A10,
212          0x0A13, 0x0A28, 0x0A2A, 0x0A30, 0x0A32, 0x0A33, 0x0A35, 0x0A36,
213          0x0A38, 0x0A39, 0x0A59, 0x0A5C, 0x0A72, 0x0A74, 0x0A85, 0x0A8B,
214          0x0A8F, 0x0A91, 0x0A93, 0x0AA8, 0x0AAA, 0x0AB0, 0x0AB2, 0x0AB3,
215          0x0AB5, 0x0AB9, 0x0B05, 0x0B0C, 0x0B0F, 0x0B10, 0x0B13, 0x0B28,
216          0x0B2A, 0x0B30, 0x0B32, 0x0B33, 0x0B36, 0x0B39, 0x0B5C, 0x0B5D,
217          0x0B5F, 0x0B61, 0x0B85, 0x0B8A, 0x0B8E, 0x0B90, 0x0B92, 0x0B95,
218          0x0B99, 0x0B9A, 0x0B9E, 0x0B9F, 0x0BA3, 0x0BA4, 0x0BA8, 0x0BAA,
219          0x0BAE, 0x0BB5, 0x0BB7, 0x0BB9, 0x0C05, 0x0C0C, 0x0C0E, 0x0C10,
220          0x0C12, 0x0C28, 0x0C2A, 0x0C33, 0x0C35, 0x0C39, 0x0C60, 0x0C61,
221          0x0C85, 0x0C8C, 0x0C8E, 0x0C90, 0x0C92, 0x0CA8, 0x0CAA, 0x0CB3,
222          0x0CB5, 0x0CB9, 0x0CE0, 0x0CE1, 0x0D05, 0x0D0C, 0x0D0E, 0x0D10,
223          0x0D12, 0x0D28, 0x0D2A, 0x0D39, 0x0D60, 0x0D61, 0x0E01, 0x0E2E,
224          0x0E32, 0x0E33, 0x0E40, 0x0E45, 0x0E81, 0x0E82, 0x0E87, 0x0E88,
225          0x0E94, 0x0E97, 0x0E99, 0x0E9F, 0x0EA1, 0x0EA3, 0x0EAA, 0x0EAB,
226          0x0EAD, 0x0EAE, 0x0EB2, 0x0EB3, 0x0EC0, 0x0EC4, 0x0F40, 0x0F47,
227          0x0F49, 0x0F69, 0x10A0, 0x10C5, 0x10D0, 0x10F6, 0x1102, 0x1103,
228          0x1105, 0x1107, 0x110B, 0x110C, 0x110E, 0x1112, 0x1154, 0x1155,
229          0x115F, 0x1161, 0x116D, 0x116E, 0x1172, 0x1173, 0x11AE, 0x11AF,
230          0x11B7, 0x11B8, 0x11BC, 0x11C2, 0x1E00, 0x1E9B, 0x1EA0, 0x1EF9,
231          0x1F00, 0x1F15, 0x1F18, 0x1F1D, 0x1F20, 0x1F45, 0x1F48, 0x1F4D,
232          0x1F50, 0x1F57, 0x1F5F, 0x1F7D, 0x1F80, 0x1FB4, 0x1FB6, 0x1FBC,
233          0x1FC2, 0x1FC4, 0x1FC6, 0x1FCC, 0x1FD0, 0x1FD3, 0x1FD6, 0x1FDB,
234          0x1FE0, 0x1FEC, 0x1FF2, 0x1FF4, 0x1FF6, 0x1FFC, 0x212A, 0x212B,
235          0x2180, 0x2182, 0x3041, 0x3094, 0x30A1, 0x30FA, 0x3105, 0x312C,
236          0xAC00, 0xD7A3,
237          // Ideographic
238
0x3021, 0x3029, 0x4E00, 0x9FA5,
239       };
240       int letterChar[] = {
241          // BaseChar
242
0x0386, 0x038C, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x0559, 0x06D5,
243          0x093D, 0x09B2, 0x0A5E, 0x0A8D, 0x0ABD, 0x0AE0, 0x0B3D, 0x0B9C,
244          0x0CDE, 0x0E30, 0x0E84, 0x0E8A, 0x0E8D, 0x0EA5, 0x0EA7, 0x0EB0,
245          0x0EBD, 0x1100, 0x1109, 0x113C, 0x113E, 0x1140, 0x114C, 0x114E,
246          0x1150, 0x1159, 0x1163, 0x1165, 0x1167, 0x1169, 0x1175, 0x119E,
247          0x11A8, 0x11AB, 0x11BA, 0x11EB, 0x11F0, 0x11F9, 0x1F59, 0x1F5B,
248          0x1F5D, 0x1FBE, 0x2126, 0x212E,
249          // Ideographic
250
0x3007,
251       };
252
253       //
254
// [87] CombiningChar ::= ...
255
//
256

257       int combiningCharRange[] = {
258          0x0300, 0x0345, 0x0360, 0x0361, 0x0483, 0x0486, 0x0591, 0x05A1,
259          0x05A3, 0x05B9, 0x05BB, 0x05BD, 0x05C1, 0x05C2, 0x064B, 0x0652,
260          0x06D6, 0x06DC, 0x06DD, 0x06DF, 0x06E0, 0x06E4, 0x06E7, 0x06E8,
261          0x06EA, 0x06ED, 0x0901, 0x0903, 0x093E, 0x094C, 0x0951, 0x0954,
262          0x0962, 0x0963, 0x0981, 0x0983, 0x09C0, 0x09C4, 0x09C7, 0x09C8,
263          0x09CB, 0x09CD, 0x09E2, 0x09E3, 0x0A40, 0x0A42, 0x0A47, 0x0A48,
264          0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A83, 0x0ABE, 0x0AC5,
265          0x0AC7, 0x0AC9, 0x0ACB, 0x0ACD, 0x0B01, 0x0B03, 0x0B3E, 0x0B43,
266          0x0B47, 0x0B48, 0x0B4B, 0x0B4D, 0x0B56, 0x0B57, 0x0B82, 0x0B83,
267          0x0BBE, 0x0BC2, 0x0BC6, 0x0BC8, 0x0BCA, 0x0BCD, 0x0C01, 0x0C03,
268          0x0C3E, 0x0C44, 0x0C46, 0x0C48, 0x0C4A, 0x0C4D, 0x0C55, 0x0C56,
269          0x0C82, 0x0C83, 0x0CBE, 0x0CC4, 0x0CC6, 0x0CC8, 0x0CCA, 0x0CCD,
270          0x0CD5, 0x0CD6, 0x0D02, 0x0D03, 0x0D3E, 0x0D43, 0x0D46, 0x0D48,
271          0x0D4A, 0x0D4D, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB4, 0x0EB9,
272          0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19, 0x0F71, 0x0F84,
273          0x0F86, 0x0F8B, 0x0F90, 0x0F95, 0x0F99, 0x0FAD, 0x0FB1, 0x0FB7,
274          0x20D0, 0x20DC, 0x302A, 0x302F,
275       };
276
277       int combiningCharChar[] = {
278          0x05BF, 0x05C4, 0x0670, 0x093C, 0x094D, 0x09BC, 0x09BE, 0x09BF,
279          0x09D7, 0x0A02, 0x0A3C, 0x0A3E, 0x0A3F, 0x0ABC, 0x0B3C, 0x0BD7,
280          0x0D57, 0x0E31, 0x0EB1, 0x0F35, 0x0F37, 0x0F39, 0x0F3E, 0x0F3F,
281          0x0F97, 0x0FB9, 0x20E1, 0x3099, 0x309A,
282       };
283
284       //
285
// [88] Digit ::= ...
286
//
287

288       int digitRange[] = {
289          0x0030, 0x0039, 0x0660, 0x0669, 0x06F0, 0x06F9, 0x0966, 0x096F,
290          0x09E6, 0x09EF, 0x0A66, 0x0A6F, 0x0AE6, 0x0AEF, 0x0B66, 0x0B6F,
291          0x0BE7, 0x0BEF, 0x0C66, 0x0C6F, 0x0CE6, 0x0CEF, 0x0D66, 0x0D6F,
292          0x0E50, 0x0E59, 0x0ED0, 0x0ED9, 0x0F20, 0x0F29,
293       };
294
295       //
296
// [89] Extender ::= ...
297
//
298

299       int extenderRange[] = {
300          0x3031, 0x3035, 0x309D, 0x309E, 0x30FC, 0x30FE,
301       };
302
303       int extenderChar[] = {
304          0x00B7, 0x02D0, 0x02D1, 0x0387, 0x0640, 0x0E46, 0x0EC6, 0x3005,
305       };
306
307       //
308
// SpecialChar ::= '<', '&', '\n', '\r', ']'
309
//
310

311       int specialChar[] = {
312          '<', '&', '\n', '\r', ']',
313       };
314
315       //
316
// Initialize
317
//
318

319       // set valid characters
320
for (int i = 0; i < charRange.length; i += 2)
321       {
322          for (int j = charRange[i]; j <= charRange[i + 1]; j++)
323          {
324             CHARS[j] |= MASK_VALID | MASK_CONTENT;
325          }
326       }
327
328       // remove special characters
329
for (int i = 0; i < specialChar.length; i++)
330       {
331          CHARS[specialChar[i]] = (byte)(CHARS[specialChar[i]] & ~MASK_CONTENT);
332       }
333
334       // set space characters
335
for (int i = 0; i < spaceChar.length; i++)
336       {
337          CHARS[spaceChar[i]] |= MASK_SPACE;
338       }
339
340       // set name start characters
341
for (int i = 0; i < nameStartChar.length; i++)
342       {
343          CHARS[nameStartChar[i]] |= MASK_NAME_START | MASK_NAME |
344                  MASK_NCNAME_START | MASK_NCNAME;
345       }
346       for (int i = 0; i < letterRange.length; i += 2)
347       {
348          for (int j = letterRange[i]; j <= letterRange[i + 1]; j++)
349          {
350             CHARS[j] |= MASK_NAME_START | MASK_NAME |
351                     MASK_NCNAME_START | MASK_NCNAME;
352          }
353       }
354       for (int i = 0; i < letterChar.length; i++)
355       {
356          CHARS[letterChar[i]] |= MASK_NAME_START | MASK_NAME |
357                  MASK_NCNAME_START | MASK_NCNAME;
358       }
359
360       // set name characters
361
for (int i = 0; i < nameChar.length; i++)
362       {
363          CHARS[nameChar[i]] |= MASK_NAME | MASK_NCNAME;
364       }
365       for (int i = 0; i < digitRange.length; i += 2)
366       {
367          for (int j = digitRange[i]; j <= digitRange[i + 1]; j++)
368          {
369             CHARS[j] |= MASK_NAME | MASK_NCNAME;
370          }
371       }
372       for (int i = 0; i < combiningCharRange.length; i += 2)
373       {
374          for (int j = combiningCharRange[i]; j <= combiningCharRange[i + 1]; j++)
375          {
376             CHARS[j] |= MASK_NAME | MASK_NCNAME;
377          }
378       }
379       for (int i = 0; i < combiningCharChar.length; i++)
380       {
381          CHARS[combiningCharChar[i]] |= MASK_NAME | MASK_NCNAME;
382       }
383       for (int i = 0; i < extenderRange.length; i += 2)
384       {
385          for (int j = extenderRange[i]; j <= extenderRange[i + 1]; j++)
386          {
387             CHARS[j] |= MASK_NAME | MASK_NCNAME;
388          }
389       }
390       for (int i = 0; i < extenderChar.length; i++)
391       {
392          CHARS[extenderChar[i]] |= MASK_NAME | MASK_NCNAME;
393       }
394
395       // remove ':' from allowable MASK_NCNAME_START and MASK_NCNAME chars
396
CHARS[':'] &= ~(MASK_NCNAME_START | MASK_NCNAME);
397
398       // set Pubid characters
399
for (int i = 0; i < pubidChar.length; i++)
400       {
401          CHARS[pubidChar[i]] |= MASK_PUBID;
402       }
403       for (int i = 0; i < pubidRange.length; i += 2)
404       {
405          for (int j = pubidRange[i]; j <= pubidRange[i + 1]; j++)
406          {
407             CHARS[j] |= MASK_PUBID;
408          }
409       }
410
411    } // <clinit>()
412

413    //
414
// Public static methods
415
//
416

417    /**
418     * Returns true if the specified character is a supplemental character.
419     *
420     * @param c The character to check.
421     */

422    public static boolean isSupplemental(int c)
423    {
424       return (c >= 0x10000 && c <= 0x10FFFF);
425    }
426
427    /**
428     * Returns true the supplemental character corresponding to the given
429     * surrogates.
430     *
431     * @param h The high surrogate.
432     * @param l The low surrogate.
433     */

434    public static int supplemental(char h, char l)
435    {
436       return (h - 0xD800) * 0x400 + (l - 0xDC00) + 0x10000;
437    }
438
439    /**
440     * Returns the high surrogate of a supplemental character
441     *
442     * @param c The supplemental character to "split".
443     */

444    public static char highSurrogate(int c)
445    {
446       return (char)(((c - 0x00010000) >> 10) + 0xD800);
447    }
448
449    /**
450     * Returns the low surrogate of a supplemental character
451     *
452     * @param c The supplemental character to "split".
453     */

454    public static char lowSurrogate(int c)
455    {
456       return (char)(((c - 0x00010000) & 0x3FF) + 0xDC00);
457    }
458
459    /**
460     * Returns whether the given character is a high surrogate
461     *
462     * @param c The character to check.
463     */

464    public static boolean isHighSurrogate(int c)
465    {
466       return (0xD800 <= c && c <= 0xDBFF);
467    }
468
469    /**
470     * Returns whether the given character is a low surrogate
471     *
472     * @param c The character to check.
473     */

474    public static boolean isLowSurrogate(int c)
475    {
476       return (0xDC00 <= c && c <= 0xDFFF);
477    }
478
479
480    /**
481     * Returns true if the specified character is valid. This method
482     * also checks the surrogate character range from 0x10000 to 0x10FFFF.
483     * <p/>
484     * If the program chooses to apply the mask directly to the
485     * <code>CHARS</code> array, then they are responsible for checking
486     * the surrogate character range.
487     *
488     * @param c The character to check.
489     */

490    public static boolean isValid(int c)
491    {
492       return (c < 0x10000 && (CHARS[c] & MASK_VALID) != 0) ||
493               (0x10000 <= c && c <= 0x10FFFF);
494    } // isValid(int):boolean
495

496    /**
497     * Returns true if the specified character is invalid.
498     *
499     * @param c The character to check.
500     */

501    public static boolean isInvalid(int c)
502    {
503       return !isValid(c);
504    } // isInvalid(int):boolean
505

506    /**
507     * Returns true if the specified character can be considered content.
508     *
509     * @param c The character to check.
510     */

511    public static boolean isContent(int c)
512    {
513       return (c < 0x10000 && (CHARS[c] & MASK_CONTENT) != 0) ||
514               (0x10000 <= c && c <= 0x10FFFF);
515    } // isContent(int):boolean
516

517    /**
518     * Returns true if the specified character can be considered markup.
519     * Markup characters include '&lt;', '&amp;', and '%'.
520     *
521     * @param c The character to check.
522     */

523    public static boolean isMarkup(int c)
524    {
525       return c == '<' || c == '&' || c == '%';
526    } // isMarkup(int):boolean
527

528    /**
529     * Returns true if the specified character is a space character
530     * as defined by production [3] in the XML 1.0 specification.
531     *
532     * @param c The character to check.
533     */

534    public static boolean isSpace(int c)
535    {
536       return c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0;
537    } // isSpace(int):boolean
538

539    /**
540     * Returns true if the specified character is a space character
541     * as amdended in the XML 1.1 specification.
542     *
543     * @param c The character to check.
544     */

545    public static boolean isXML11Space(int c)
546    {
547       return (c < 0x10000 && (CHARS[c] & MASK_SPACE) != 0) ||
548               c == 0x85 || c == 0x2028;
549    } // isXML11Space(int):boolean
550

551    /**
552     * Returns true if the specified character is a valid name start
553     * character as defined by production [5] in the XML 1.0
554     * specification.
555     *
556     * @param c The character to check.
557     */

558    public static boolean isNameStart(int c)
559    {
560       return c < 0x10000 && (CHARS[c] & MASK_NAME_START) != 0;
561    } // isNameStart(int):boolean
562

563    /**
564     * Returns true if the specified character is a valid name
565     * character as defined by production [4] in the XML 1.0
566     * specification.
567     *
568     * @param c The character to check.
569     */

570    public static boolean isName(int c)
571    {
572       return c < 0x10000 && (CHARS[c] & MASK_NAME) != 0;
573    } // isName(int):boolean
574

575    /**
576     * Returns true if the specified character is a valid NCName start
577     * character as defined by production [4] in Namespaces in XML
578     * recommendation.
579     *
580     * @param c The character to check.
581     */

582    public static boolean isNCNameStart(int c)
583    {
584       return c < 0x10000 && (CHARS[c] & MASK_NCNAME_START) != 0;
585    } // isNCNameStart(int):boolean
586

587    /**
588     * Returns true if the specified character is a valid NCName
589     * character as defined by production [5] in Namespaces in XML
590     * recommendation.
591     *
592     * @param c The character to check.
593     */

594    public static boolean isNCName(int c)
595    {
596       return c < 0x10000 && (CHARS[c] & MASK_NCNAME) != 0;
597    } // isNCName(int):boolean
598

599    /**
600     * Returns true if the specified character is a valid Pubid
601     * character as defined by production [13] in the XML 1.0
602     * specification.
603     *
604     * @param c The character to check.
605     */

606    public static boolean isPubid(int c)
607    {
608       return c < 0x10000 && (CHARS[c] & MASK_PUBID) != 0;
609    } // isPubid(int):boolean
610

611    /*
612     * [5] Name ::= (Letter | '_' | ':') (NameChar)*
613     */

614    /**
615     * Check to see if a string is a valid Name according to [5]
616     * in the XML 1.0 Recommendation
617     *
618     * @param name string to check
619     * @return true if name is a valid Name
620     */

621    public static boolean isValidName(String JavaDoc name)
622    {
623       if (name.length() == 0)
624          return false;
625       char ch = name.charAt(0);
626       if (isNameStart(ch) == false)
627          return false;
628       for (int i = 1; i < name.length(); i++)
629       {
630          ch = name.charAt(i);
631          if (isName(ch) == false)
632          {
633             return false;
634          }
635       }
636       return true;
637    } // isValidName(String):boolean
638

639
640    /*
641     * from the namespace rec
642     * [4] NCName ::= (Letter | '_') (NCNameChar)*
643     */

644    /**
645     * Check to see if a string is a valid NCName according to [4]
646     * from the XML Namespaces 1.0 Recommendation
647     *
648     * @param name string to check
649     * @return true if name is a valid NCName
650     */

651    public static boolean isValidNCName(String JavaDoc ncName)
652    {
653       if (ncName.length() == 0)
654          return false;
655       char ch = ncName.charAt(0);
656       if (isNCNameStart(ch) == false)
657          return false;
658       for (int i = 1; i < ncName.length(); i++)
659       {
660          ch = ncName.charAt(i);
661          if (isNCName(ch) == false)
662          {
663             return false;
664          }
665       }
666       return true;
667    } // isValidNCName(String):boolean
668

669    /*
670     * [7] Nmtoken ::= (NameChar)+
671     */

672    /**
673     * Check to see if a string is a valid Nmtoken according to [7]
674     * in the XML 1.0 Recommendation
675     *
676     * @param nmtoken string to check
677     * @return true if nmtoken is a valid Nmtoken
678     */

679    public static boolean isValidNmtoken(String JavaDoc nmtoken)
680    {
681       if (nmtoken.length() == 0)
682          return false;
683       for (int i = 0; i < nmtoken.length(); i++)
684       {
685          char ch = nmtoken.charAt(i);
686          if (!isName(ch))
687          {
688             return false;
689          }
690       }
691       return true;
692    } // isValidName(String):boolean
693

694
695
696
697
698    // encodings
699

700    /**
701     * Returns true if the encoding name is a valid IANA encoding.
702     * This method does not verify that there is a decoder available
703     * for this encoding, only that the characters are valid for an
704     * IANA encoding name.
705     *
706     * @param ianaEncoding The IANA encoding name.
707     */

708    public static boolean isValidIANAEncoding(String JavaDoc ianaEncoding)
709    {
710       if (ianaEncoding != null)
711       {
712          int length = ianaEncoding.length();
713          if (length > 0)
714          {
715             char c = ianaEncoding.charAt(0);
716             if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
717             {
718                for (int i = 1; i < length; i++)
719                {
720                   c = ianaEncoding.charAt(i);
721                   if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
722                           (c < '0' || c > '9') && c != '.' && c != '_' &&
723                           c != '-')
724                   {
725                      return false;
726                   }
727                }
728                return true;
729             }
730          }
731       }
732       return false;
733    } // isValidIANAEncoding(String):boolean
734

735    /**
736     * Returns true if the encoding name is a valid Java encoding.
737     * This method does not verify that there is a decoder available
738     * for this encoding, only that the characters are valid for an
739     * Java encoding name.
740     *
741     * @param javaEncoding The Java encoding name.
742     */

743    public static boolean isValidJavaEncoding(String JavaDoc javaEncoding)
744    {
745       if (javaEncoding != null)
746       {
747          int length = javaEncoding.length();
748          if (length > 0)
749          {
750             for (int i = 1; i < length; i++)
751             {
752                char c = javaEncoding.charAt(i);
753                if ((c < 'A' || c > 'Z') && (c < 'a' || c > 'z') &&
754                        (c < '0' || c > '9') && c != '.' && c != '_' &&
755                        c != '-')
756                {
757                   return false;
758                }
759             }
760             return true;
761          }
762       }
763       return false;
764    } // isValidIANAEncoding(String):boolean
765

766 } // class XMLChar
767
Popular Tags