1 7 25 26 42 43 package net.sf.saxon.aelfred; 44 45 import java.io.BufferedInputStream; 46 import java.io.CharConversionException; 47 import java.io.EOFException; 48 import java.io.InputStream; 49 import java.io.InputStreamReader; 50 import java.io.IOException; 51 import java.io.Reader; 52 import java.net.URL; 53 import java.net.URLConnection; 54 import java.util.Enumeration; 55 import java.util.Hashtable; 56 import java.util.Stack; 57 58 import org.xml.sax.SAXException; 59 60 61 63 74 final class XmlParser 75 { 76 private final static boolean USE_CHEATS = true; 78 79 private final static int DEFAULT_ATTR_COUNT = 23; 81 82 83 87 88 93 XmlParser () 95 { 96 cleanupVariables (); 97 } 98 99 100 105 void setHandler (SAXDriver handler) 107 { 108 this.handler = handler; 109 } 110 111 112 129 void doParse ( 131 String systemId, 132 String publicId, 133 Reader reader, 134 InputStream stream, 135 String encoding 136 ) throws Exception 137 { 138 if (handler == null) 139 throw new IllegalStateException ("no callback handler"); 140 141 basePublicId = publicId; 142 baseURI = systemId; 143 baseReader = reader; 144 baseInputStream = stream; 145 146 initializeVariables (); 147 148 setInternalEntity ("amp", "&"); 152 setInternalEntity ("lt", "<"); 153 setInternalEntity ("gt", ">"); 154 setInternalEntity ("apos", "'"); 155 setInternalEntity ("quot", """); 156 157 handler.startDocument (); 158 159 pushURL ("[document]", basePublicId, baseURI, 160 baseReader, baseInputStream, encoding, false); 161 162 try { 163 parseDocument (); 164 handler.endDocument (); 165 } finally { 166 if (baseReader != null) 167 try { baseReader.close (); 168 } catch (IOException e) { } 169 if (baseInputStream != null) 170 try { baseInputStream.close (); 171 } catch (IOException e) { } 172 if (is != null) 173 try { is.close (); 174 } catch (IOException e) { } 175 if (reader != null) 176 try { 177 reader.close (); 178 } catch (IOException e) { 179 } 180 cleanupVariables (); 181 } 182 } 183 184 185 189 193 197 public final static int CONTENT_UNDECLARED = 0; 198 199 203 public final static int CONTENT_ANY = 1; 204 205 209 public final static int CONTENT_EMPTY = 2; 210 211 215 public final static int CONTENT_MIXED = 3; 216 217 221 public final static int CONTENT_ELEMENTS = 4; 222 223 224 228 232 public final static int ENTITY_UNDECLARED = 0; 233 234 238 public final static int ENTITY_INTERNAL = 1; 239 240 244 public final static int ENTITY_NDATA = 2; 245 246 250 public final static int ENTITY_TEXT = 3; 251 252 253 257 261 public final static int ATTRIBUTE_UNDECLARED = 0; 262 263 267 public final static int ATTRIBUTE_CDATA = 1; 268 269 273 public final static int ATTRIBUTE_ID = 2; 274 275 279 public final static int ATTRIBUTE_IDREF = 3; 280 281 285 public final static int ATTRIBUTE_IDREFS = 4; 286 287 291 public final static int ATTRIBUTE_ENTITY = 5; 292 293 297 public final static int ATTRIBUTE_ENTITIES = 6; 298 299 303 public final static int ATTRIBUTE_NMTOKEN = 7; 304 305 309 public final static int ATTRIBUTE_NMTOKENS = 8; 310 311 315 public final static int ATTRIBUTE_ENUMERATED = 9; 316 317 321 public final static int ATTRIBUTE_NOTATION = 10; 322 323 324 329 332 private static Hashtable attributeTypeHash; 333 static { 334 attributeTypeHash = new Hashtable (13); 335 attributeTypeHash.put ("CDATA", new Integer (ATTRIBUTE_CDATA)); 336 attributeTypeHash.put ("ID", new Integer (ATTRIBUTE_ID)); 337 attributeTypeHash.put ("IDREF", new Integer (ATTRIBUTE_IDREF)); 338 attributeTypeHash.put ("IDREFS", new Integer (ATTRIBUTE_IDREFS)); 339 attributeTypeHash.put ("ENTITY", new Integer (ATTRIBUTE_ENTITY)); 340 attributeTypeHash.put ("ENTITIES", new Integer (ATTRIBUTE_ENTITIES)); 341 attributeTypeHash.put ("NMTOKEN", new Integer (ATTRIBUTE_NMTOKEN)); 342 attributeTypeHash.put ("NMTOKENS", new Integer (ATTRIBUTE_NMTOKENS)); 343 attributeTypeHash.put ("NOTATION", new Integer (ATTRIBUTE_NOTATION)); 344 } 345 346 347 private final static int ENCODING_EXTERNAL = 0; 351 private final static int ENCODING_UTF_8 = 1; 352 private final static int ENCODING_ISO_8859_1 = 2; 353 private final static int ENCODING_UCS_2_12 = 3; 354 private final static int ENCODING_UCS_2_21 = 4; 355 private final static int ENCODING_UCS_4_1234 = 5; 356 private final static int ENCODING_UCS_4_4321 = 6; 357 private final static int ENCODING_UCS_4_2143 = 7; 358 private final static int ENCODING_UCS_4_3412 = 8; 359 private final static int ENCODING_ASCII = 9; 360 361 362 366 370 public final static int ATTRIBUTE_DEFAULT_UNDECLARED = 30; 371 372 377 public final static int ATTRIBUTE_DEFAULT_SPECIFIED = 31; 378 379 383 public final static int ATTRIBUTE_DEFAULT_IMPLIED = 32; 384 385 389 public final static int ATTRIBUTE_DEFAULT_REQUIRED = 33; 390 391 396 public final static int ATTRIBUTE_DEFAULT_FIXED = 34; 397 398 399 private final static int INPUT_NONE = 0; 403 private final static int INPUT_INTERNAL = 1; 404 private final static int INPUT_STREAM = 3; 405 private final static int INPUT_BUFFER = 4; 406 private final static int INPUT_READER = 5; 407 408 409 private final static int LIT_ENTITY_REF = 2; 414 private final static int LIT_NORMALIZE = 4; 416 private final static int LIT_ATTRIBUTE = 8; 418 private final static int LIT_DISABLE_PE = 16; 420 private final static int LIT_DISABLE_CREF = 32; 422 private final static int LIT_DISABLE_EREF = 64; 424 private final static int LIT_ENTITY_CHECK = 128; 426 private final static int LIT_PUBID = 256; 428 429 private final static int CONTEXT_NORMAL = 0; 434 private final static int CONTEXT_LITERAL = 1; 435 436 437 441 442 449 private void error (String message, String textFound, String textExpected) 450 throws SAXException 451 { 452 if (textFound != null) { 453 message = message + " (found \"" + textFound + "\")"; 454 } 455 if (textExpected != null) { 456 message = message + " (expected \"" + textExpected + "\")"; 457 } 458 String uri = null; 459 460 if (externalEntity != null) { 461 uri = externalEntity.getURL ().toString (); 462 } 463 handler.error (message, uri, line, column); 464 465 throw new SAXException (message); 467 } 468 469 470 475 private void error (String message, char textFound, String textExpected) 476 throws SAXException 477 { 478 error (message, new Character (textFound).toString (), textExpected); 479 } 480 481 482 private void error (String message) 483 throws SAXException 484 { 485 error (message, null, null); 486 } 487 488 489 493 494 504 private void parseDocument () 505 throws Exception 506 { 507 try { parseProlog (); 509 require ('<', "document prolog"); 510 parseElement (); 511 } catch (EOFException ee) { error("premature end of file", "[EOF]", null); 513 } 514 515 try { 516 parseMisc (); char c = readCh (); error ("unexpected characters after document end", c, null); 519 } catch (EOFException e) { 520 return; 521 } 522 } 523 524 525 532 private void parseComment () 533 throws Exception 534 { 535 char c; 536 boolean saved = expandPE; 537 538 expandPE = false; 539 parseUntil ("--"); 540 require ('>', "-- in comment"); 541 expandPE = saved; 542 handler.comment (dataBuffer, 0, dataBufferPos); 543 dataBufferPos = 0; 544 } 545 546 547 557 private void parsePI () 558 throws SAXException, IOException 559 { 560 String name; 561 boolean saved = expandPE; 562 563 expandPE = false; 564 name = readNmtoken (true); 565 if ("xml".equalsIgnoreCase (name)) 566 error ("Illegal processing instruction target", name, null); 567 if (!tryRead ("?>")) { 568 requireWhitespace (); 569 parseUntil ("?>"); 570 } 571 expandPE = saved; 572 handler.processingInstruction (name, dataBufferToString ()); 573 } 574 575 576 586 private void parseCDSect () 587 throws Exception 588 { 589 parseUntil ("]]>"); 590 dataBufferFlush (); 591 } 592 593 594 608 private void parseProlog () 609 throws Exception 610 { 611 parseMisc (); 612 613 if (tryRead ("<!DOCTYPE")) { 614 parseDoctypedecl (); 615 parseMisc (); 616 } 617 } 618 619 620 638 private String parseXMLDecl (boolean ignoreEncoding) 639 throws SAXException, IOException 640 { 641 String version; 642 String encodingName = null; 643 String standalone = null; 644 int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; 645 646 require ("version", "XML declaration"); 648 parseEq (); 649 version = readLiteral (flags); 650 if (!version.equals ("1.0")) { 651 error ("unsupported XML version", version, "1.0"); 652 } 653 654 boolean white = tryWhitespace (); 656 if (tryRead ("encoding")) { 657 if (!white) 658 error ("whitespace required before 'encoding='"); 659 parseEq (); 660 encodingName = readLiteral (flags); 661 if (!ignoreEncoding) 662 setupDecoding (encodingName); 663 } 664 665 if (encodingName != null) 667 white = tryWhitespace (); 668 if (tryRead ("standalone")) { 669 if (!white) 670 error ("whitespace required before 'standalone='"); 671 parseEq (); 672 standalone = readLiteral (flags); 673 if (! ("yes".equals (standalone) || "no".equals (standalone))) 674 error ("standalone flag must be 'yes' or 'no'"); 675 } 676 677 skipWhitespace (); 678 require ("?>", "XML declaration"); 679 680 return encodingName; 681 } 682 683 684 697 private String parseTextDecl (boolean ignoreEncoding) 698 throws SAXException, IOException 699 { 700 String encodingName = null; 701 int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; 702 703 if (tryRead ("version")) { 705 String version; 706 parseEq (); 707 version = readLiteral (flags); 708 if (!version.equals ("1.0")) { 709 error ("unsupported XML version", version, "1.0"); 710 } 711 requireWhitespace (); 712 } 713 714 715 require ("encoding", "XML text declaration"); 717 parseEq (); 718 encodingName = readLiteral (flags); 719 if (!ignoreEncoding) 720 setupDecoding (encodingName); 721 722 skipWhitespace (); 723 require ("?>", "XML text declaration"); 724 725 return encodingName; 726 } 727 728 729 745 private void setupDecoding (String encodingName) 746 throws SAXException, IOException 747 { 748 encodingName = encodingName.toUpperCase (); 749 750 754 757 if (encoding == ENCODING_UTF_8 || encoding == ENCODING_EXTERNAL) { 759 if (encodingName.equals ("ISO-8859-1") 760 || encodingName.equals ("8859_1") 761 || encodingName.equals ("ISO8859_1") 762 ) { 763 encoding = ENCODING_ISO_8859_1; 764 return; 765 } else if (encodingName.equals ("US-ASCII") 766 || encodingName.equals ("ASCII")) { 767 encoding = ENCODING_ASCII; 768 return; 769 } else if (encodingName.equals ("UTF-8") 770 || encodingName.equals ("UTF8")) { 771 encoding = ENCODING_UTF_8; 772 return; 773 } else if (encoding != ENCODING_EXTERNAL) { 774 throw new EncodingException (encodingName); 776 } 777 } 780 781 if (encoding == ENCODING_UCS_2_12 || encoding == ENCODING_UCS_2_21) { 783 if (!(encodingName.equals ("ISO-10646-UCS-2") 784 || encodingName.equals ("UTF-16") 785 || encodingName.equals ("UTF-16BE") 786 || encodingName.equals ("UTF-16LE"))) 787 error ("unsupported Unicode encoding", 788 encodingName, 789 "UTF-16"); 790 return; 791 } 792 793 if (encoding == ENCODING_UCS_4_1234 795 || encoding == ENCODING_UCS_4_4321 796 || encoding == ENCODING_UCS_4_2143 797 || encoding == ENCODING_UCS_4_3412) { 798 if (!encodingName.equals ("ISO-10646-UCS-4")) 799 error ("unsupported 32-bit encoding", 800 encodingName, 801 "ISO-10646-UCS-4"); 802 return; 803 } 804 805 809 if (encodingName.equals ("UTF-16BE")) { 810 encoding = ENCODING_UCS_2_12; 811 return; 812 } 813 if (encodingName.equals ("UTF-16LE")) { 814 encoding = ENCODING_UCS_2_21; 815 return; 816 } 817 818 822 if (encodingName.equals ("UTF-16") 823 || encodingName.equals ("ISO-10646-UCS-2")) 824 encodingName = "Unicode"; 825 827 reader = new InputStreamReader (is, encodingName); 828 sourceType = INPUT_READER; 829 } 830 831 832 839 private void parseMisc () 840 throws Exception 841 { 842 while (true) { 843 skipWhitespace (); 844 if (tryRead ("<?")) { 845 parsePI (); 846 } else if (tryRead ("<!--")) { 847 parseComment (); 848 } else { 849 return; 850 } 851 } 852 } 853 854 855 863 private void parseDoctypedecl () 864 throws Exception 865 { 866 String doctypeName, ids[]; 867 868 requireWhitespace (); 870 doctypeName = readNmtoken (true); 871 872 skipWhitespace (); 874 ids = readExternalIds (false); 875 876 handler.doctypeDecl (doctypeName, ids [0], ids [1]); 878 879 skipWhitespace (); 881 if (tryRead ('[')) { 882 883 while (true) { 885 expandPE = true; 886 skipWhitespace (); 887 expandPE = false; 888 if (tryRead (']')) { 889 break; } else { 891 peIsError = expandPE = true; 893 parseMarkupdecl (); 894 peIsError = expandPE = false; 895 } 896 } 897 } 898 899 if (ids [1] != null) { 901 pushURL ("[external subset]", ids [0], ids [1], null, null, null, false); 902 903 while (true) { 905 expandPE = true; 906 skipWhitespace (); 907 expandPE = false; 908 if (tryRead ('>')) { 909 break; 910 } else { 911 expandPE = true; 912 parseMarkupdecl (); 913 expandPE = false; 914 } 915 } 916 } else { 917 skipWhitespace (); 919 require ('>', "internal DTD subset"); 920 } 921 922 handler.endDoctype (); 924 expandPE = false; 925 } 926 927 928 939 private void parseMarkupdecl () 940 throws Exception 941 { 942 if (tryRead ("<!ELEMENT")) { 943 parseElementdecl (); 944 } else if (tryRead ("<!ATTLIST")) { 945 parseAttlistDecl (); 946 } else if (tryRead ("<!ENTITY")) { 947 parseEntityDecl (); 948 } else if (tryRead ("<!NOTATION")) { 949 parseNotationDecl (); 950 } else if (tryRead ("<?")) { 951 parsePI (); 952 } else if (tryRead ("<!--")) { 953 parseComment (); 954 } else if (tryRead ("<![")) { 955 if (inputStack.size () > 0) 956 parseConditionalSect (); 957 else 958 error ("conditional sections illegal in internal subset"); 959 } else { 960 error ("expected markup declaration"); 961 } 962 } 963 964 965 976 private void parseElement () 977 throws Exception 978 { 979 String gi; 980 char c; 981 int oldElementContent = currentElementContent; 982 String oldElement = currentElement; 983 Object element []; 984 985 tagAttributePos = 0; 988 989 gi = readNmtoken (true); 991 992 currentElement = gi; 994 element = (Object []) elementInfo.get (gi); 995 currentElementContent = getContentType (element, CONTENT_ANY); 996 997 boolean white = tryWhitespace (); 1000 c = readCh (); 1001 while (c != '/' && c != '>') { 1002 unread (c); 1003 if (!white) 1004 error ("need whitespace between attributes"); 1005 parseAttribute (gi); 1006 white = tryWhitespace (); 1007 c = readCh (); 1008 } 1009 1010 Enumeration atts = declaredAttributes (element); 1012 if (atts != null) { 1013 String aname; 1014loop: 1015 while (atts.hasMoreElements ()) { 1016 aname = (String) atts.nextElement (); 1017 for (int i = 0; i < tagAttributePos; i++) { 1019 if (tagAttributes [i] == aname) { 1020 continue loop; 1021 } 1022 } 1023 String defaultVal = getAttributeExpandedValue (gi, aname); 1025 if (defaultVal!=null) { 1026 handler.attribute (aname, defaultVal, false); 1027 } 1028 } 1029 } 1030 1031 switch (c) { 1035 case '>': 1036 handler.startElement (gi); 1037 parseContent (); 1038 break; 1039 case '/': 1040 require ('>', "empty element tag"); 1041 handler.startElement (gi); 1042 handler.endElement (gi); 1043 break; 1044 } 1045 1046 currentElement = oldElement; 1048 currentElementContent = oldElementContent; 1049 } 1050 1051 1052 1060 private void parseAttribute (String name) 1061 throws Exception 1062 { 1063 String aname; 1064 int type; 1065 String value; 1066 int flags = LIT_ATTRIBUTE | LIT_ENTITY_REF; 1067 1068 aname = readNmtoken (true); 1070 type = getAttributeType (name, aname); 1071 1072 parseEq (); 1074 1075 if (type == ATTRIBUTE_CDATA || type == ATTRIBUTE_UNDECLARED) { 1078 value = readLiteral (flags); 1079 } else { 1080 value = readLiteral (flags | LIT_NORMALIZE); 1081 } 1082 1083 for (int i = 0; i < tagAttributePos; i++) 1085 if (aname.equals (tagAttributes [i])) 1086 error ("duplicate attribute", aname, null); 1087 1088 1092 handler.attribute (aname, value, true); 1095 dataBufferPos = 0; 1096 1097 if (tagAttributePos == tagAttributes.length) { 1100 String newAttrib[] = new String [tagAttributes.length * 2]; 1101 System.arraycopy (tagAttributes, 0, newAttrib, 0, tagAttributePos); 1102 tagAttributes = newAttrib; 1103 } 1104 tagAttributes [tagAttributePos++] = aname; 1105 } 1106 1107 1108 1114 private void parseEq () 1115 throws SAXException, IOException 1116 { 1117 skipWhitespace (); 1118 require ('=', "attribute name"); 1119 skipWhitespace (); 1120 } 1121 1122 1123 1131 private void parseETag () 1132 throws Exception 1133 { 1134 require (currentElement, "element end tag"); 1135 skipWhitespace (); 1136 require ('>', "name in end tag"); 1137 handler.endElement (currentElement); 1138 } 1141 1142 1143 1152 private void parseContent () 1153 throws Exception 1154 { 1155 char c; 1156 while (true) { 1157 1174 parseCharData(); 1177 c = readCh (); 1179 switch (c) { 1180 case '&': 1182 c = readCh (); 1183 if (c == '#') { 1184 parseCharRef (); 1185 } else { 1186 unread (c); 1187 parseEntityRef (true); 1188 } 1189 break; 1190 1191 case '<': dataBufferFlush (); 1193 c = readCh (); 1194 switch (c) { 1195 case '!': c = readCh (); 1197 switch (c) { 1198 case '-': require ('-', "start of comment"); 1200 parseComment (); 1201 break; 1202 case '[': require ("CDATA[", "CDATA section"); 1204 handler.startCDATA (); 1205 inCDATA = true; 1206 parseCDSect (); 1207 inCDATA = false; 1208 handler.endCDATA (); 1209 break; 1210 default: 1211 error ("expected comment or CDATA section", c, null); 1212 break; 1213 } 1214 break; 1215 1216 case '?': parsePI (); 1218 break; 1219 1220 case '/': parseETag (); 1222 return; 1223 1224 default: unread (c); 1226 parseElement (); 1227 break; 1228 } 1229 } 1230 } 1231 } 1232 1233 1234 1241 private void parseElementdecl () 1242 throws Exception 1243 { 1244 String name; 1245 1246 requireWhitespace (); 1247 name = readNmtoken (true); 1249 1250 requireWhitespace (); 1251 parseContentspec (name); 1253 1254 skipWhitespace (); 1255 require ('>', "element declaration"); 1256 } 1257 1258 1259 1265 private void parseContentspec (String name) 1266 throws Exception 1267 { 1268 if (tryRead ("EMPTY")) { 1269 setElement (name, CONTENT_EMPTY, null, null); 1270 return; 1271 } else if (tryRead ("ANY")) { 1272 setElement (name, CONTENT_ANY, null, null); 1273 return; 1274 } else { 1275 require ('(', "element name"); 1276 dataBufferAppend ('('); 1277 skipWhitespace (); 1278 if (tryRead ("#PCDATA")) { 1279 dataBufferAppend ("#PCDATA"); 1280 parseMixed (); 1281 setElement (name, CONTENT_MIXED, dataBufferToString (), null); 1282 } else { 1283 parseElements (); 1284 setElement (name, CONTENT_ELEMENTS, 1285 dataBufferToString (), null); 1286 } 1287 } 1288 } 1289 1290 1291 1301 private void parseElements () 1302 throws Exception 1303 { 1304 char c; 1305 char sep; 1306 1307 skipWhitespace (); 1309 parseCp (); 1310 1311 skipWhitespace (); 1313 c = readCh (); 1314 switch (c) { 1315 case ')': 1316 dataBufferAppend (')'); 1317 c = readCh (); 1318 switch (c) { 1319 case '*': 1320 case '+': 1321 case '?': 1322 dataBufferAppend (c); 1323 break; 1324 default: 1325 unread (c); 1326 } 1327 return; 1328 case ',': case '|': 1330 sep = c; 1331 dataBufferAppend (c); 1332 break; 1333 default: 1334 error ("bad separator in content model", c, null); 1335 return; 1336 } 1337 1338 while (true) { 1340 skipWhitespace (); 1341 parseCp (); 1342 skipWhitespace (); 1343 c = readCh (); 1344 if (c == ')') { 1345 dataBufferAppend (')'); 1346 break; 1347 } else if (c != sep) { 1348 error ("bad separator in content model", c, null); 1349 return; 1350 } else { 1351 dataBufferAppend (c); 1352 } 1353 } 1354 1355 c = readCh (); 1357 switch (c) { 1358 case '?': 1359 case '*': 1360 case '+': 1361 dataBufferAppend (c); 1362 return; 1363 default: 1364 unread (c); 1365 return; 1366 } 1367 } 1368 1369 1370 1376 private void parseCp () 1377 throws Exception 1378 { 1379 if (tryRead ('(')) { 1380 dataBufferAppend ('('); 1381 parseElements (); 1382 } else { 1383 dataBufferAppend (readNmtoken (true)); 1384 char c = readCh (); 1385 switch (c) { 1386 case '?': 1387 case '*': 1388 case '+': 1389 dataBufferAppend (c); 1390 break; 1391 default: 1392 unread (c); 1393 break; 1394 } 1395 } 1396 } 1397 1398 1399 1406 private void parseMixed () 1407 throws Exception 1408 { 1409 1410 skipWhitespace (); 1412 if (tryRead (')')) { 1413 dataBufferAppend (")*"); 1414 tryRead ('*'); 1415 return; 1416 } 1417 1418 skipWhitespace (); 1420 while (!tryRead (")*")) { 1421 require ('|', "alternative"); 1422 dataBufferAppend ('|'); 1423 skipWhitespace (); 1424 dataBufferAppend (readNmtoken (true)); 1425 skipWhitespace (); 1426 } 1427 dataBufferAppend (")*"); 1428 } 1429 1430 1431 1438 private void parseAttlistDecl () 1439 throws Exception 1440 { 1441 String elementName; 1442 1443 requireWhitespace (); 1444 elementName = readNmtoken (true); 1445 boolean white = tryWhitespace (); 1446 while (!tryRead ('>')) { 1447 if (!white) 1448 error ("whitespace required before attribute definition"); 1449 parseAttDef (elementName); 1450 white = tryWhitespace (); 1451 } 1452 } 1453 1454 1455 1461 private void parseAttDef (String elementName) 1462 throws Exception 1463 { 1464 String name; 1465 int type; 1466 String enum = null; 1467 1468 name = readNmtoken (true); 1470 1471 requireWhitespace (); 1473 type = readAttType (); 1474 1475 if (type == ATTRIBUTE_ENUMERATED || type == ATTRIBUTE_NOTATION) { 1478 enum = dataBufferToString (); 1479 } 1480 1481 requireWhitespace (); 1483 parseDefault (elementName, name, type, enum); 1484 } 1485 1486 1487 1497 private int readAttType () 1498 throws Exception 1499 { 1500 if (tryRead ('(')) { 1501 parseEnumeration (false); 1502 return ATTRIBUTE_ENUMERATED; 1503 } else { 1504 String typeString = readNmtoken (true); 1505 if (typeString.equals ("NOTATION")) { 1506 parseNotationType (); 1507 } 1508 Integer type = (Integer) attributeTypeHash.get (typeString); 1509 if (type == null) { 1510 error ("illegal attribute type", typeString, null); 1511 return ATTRIBUTE_UNDECLARED; 1512 } else { 1513 return type.intValue (); 1514 } 1515 } 1516 } 1517 1518 1519 1526 private void parseEnumeration (boolean isNames) 1527 throws Exception 1528 { 1529 dataBufferAppend ('('); 1530 1531 skipWhitespace (); 1533 dataBufferAppend (readNmtoken (isNames)); 1534 skipWhitespace (); 1536 while (!tryRead (')')) { 1537 require ('|', "enumeration value"); 1538 dataBufferAppend ('|'); 1539 skipWhitespace (); 1540 dataBufferAppend (readNmtoken (isNames)); 1541 skipWhitespace (); 1542 } 1543 dataBufferAppend (')'); 1544 } 1545 1546 1547 1555 private void parseNotationType () 1556 throws Exception 1557 { 1558 requireWhitespace (); 1559 require ('(', "NOTATION"); 1560 1561 parseEnumeration (true); 1562 } 1563 1564 1565 1572 private void parseDefault ( 1573 String elementName, 1574 String name, 1575 int type, 1576 String enum 1577 ) throws Exception 1578 { 1579 int valueType = ATTRIBUTE_DEFAULT_SPECIFIED; 1580 String value = null; 1581 int flags = LIT_ATTRIBUTE | LIT_DISABLE_CREF | LIT_ENTITY_CHECK | LIT_DISABLE_PE; 1582 1585 1590 1594 if (tryRead ('#')) { 1595 if (tryRead ("FIXED")) { 1596 valueType = ATTRIBUTE_DEFAULT_FIXED; 1597 requireWhitespace (); 1598 value = readLiteral (flags); 1599 } else if (tryRead ("REQUIRED")) { 1600 valueType = ATTRIBUTE_DEFAULT_REQUIRED; 1601 } else if (tryRead ("IMPLIED")) { 1602 valueType = ATTRIBUTE_DEFAULT_IMPLIED; 1603 } else { 1604 error ("illegal keyword for attribute default value"); 1605 } 1606 } else 1607 value = readLiteral (flags); 1608 setAttribute (elementName, name, type, enum, value, valueType); 1609 } 1610 1611 1612 1626 private void parseConditionalSect () 1627 throws Exception 1628 { 1629 skipWhitespace (); 1630 if (tryRead ("INCLUDE")) { 1631 skipWhitespace (); 1632 require ('[', "INCLUDE"); 1633 skipWhitespace (); 1634 while (!tryRead ("]]>")) { 1635 parseMarkupdecl (); 1636 skipWhitespace (); 1637 } 1638 } else if (tryRead ("IGNORE")) { 1639 skipWhitespace (); 1640 require ('[', "IGNORE"); 1641 int nesting = 1; 1642 char c; 1643 expandPE = false; 1644 for (int nest = 1; nest > 0;) { 1645 c = readCh (); 1646 switch (c) { 1647 case '<': 1648 if (tryRead ("![")) { 1649 nest++; 1650 } 1651 case ']': 1652 if (tryRead ("]>")) { 1653 nest--; 1654 } 1655 } 1656 } 1657 expandPE = true; 1658 } else { 1659 error ("conditional section must begin with INCLUDE or IGNORE"); 1660 } 1661 } 1662 1663 1664 1671 private void parseCharRef () 1672 throws SAXException, IOException 1673 { 1674 int value = 0; 1675 char c; 1676 1677 if (tryRead ('x')) { 1678loop1: 1679 while (true) { 1680 c = readCh (); 1681 switch (c) { 1682 case '0': 1683 case '1': 1684 case '2': 1685 case '3': 1686 case '4': 1687 case '5': 1688 case '6': 1689 case '7': 1690 case '8': 1691 case '9': 1692 case 'a': 1693 case 'A': 1694 case 'b': 1695 case 'B': 1696 case 'c': 1697 case 'C': 1698 case 'd': 1699 case 'D': 1700 case 'e': 1701 case 'E': 1702 case 'f': 1703 case 'F': 1704 value *= 16; 1705 value += Integer.parseInt (new Character (c).toString (), 1706 16); 1707 break; 1708 case ';': 1709 break loop1; 1710 default: 1711 error ("illegal character in character reference", c, null); 1712 break loop1; 1713 } 1714 } 1715 } else { 1716loop2: 1717 while (true) { 1718 c = readCh (); 1719 switch (c) { 1720 case '0': 1721 case '1': 1722 case '2': 1723 case '3': 1724 case '4': 1725 case '5': 1726 case '6': 1727 case '7': 1728 case '8': 1729 case '9': 1730 value *= 10; 1731 value += Integer.parseInt (new Character (c).toString (), 1732 10); 1733 break; 1734 case ';': 1735 break loop2; 1736 default: 1737 error ("illegal character in character reference", c, null); 1738 break loop2; 1739 } 1740 } 1741 } 1742 1743 if ((value < 0x0020 1745 && ! (value == '\n' || value == '\t' || value == '\r')) 1746 || (value >= 0xD800 && value <= 0xDFFF) 1747 || value == 0xFFFE || value == 0xFFFF 1748 || value > 0x0010ffff) 1749 error ("illegal XML character reference U+" 1750 + Integer.toHexString (value)); 1751 1752 if (value <= 0x0000ffff) { 1755 dataBufferAppend ((char) value); 1757 } else if (value <= 0x0010ffff) { 1758 value -= 0x10000; 1759 dataBufferAppend ((char) (0xd800 | (value >> 10))); 1761 dataBufferAppend ((char) (0xdc00 | (value & 0x0003ff))); 1762 } else { 1763 error ("character reference " + value + " is too large for UTF-16", 1765 new Integer (value).toString (), null); 1766 } 1767 } 1768 1769 1770 1778 private void parseEntityRef (boolean externalAllowed) 1779 throws SAXException, IOException 1780 { 1781 String name; 1782 1783 name = readNmtoken (true); 1784 require (';', "entity reference"); 1785 switch (getEntityType (name)) { 1786 case ENTITY_UNDECLARED: 1787 error ("reference to undeclared entity", name, null); 1788 break; 1789 case ENTITY_INTERNAL: 1790 pushString (name, getEntityValue (name)); 1791 break; 1792 case ENTITY_TEXT: 1793 if (externalAllowed) { 1794 pushURL (name, getEntityPublicId (name), 1795 getEntitySystemId (name), 1796 null, null, null, true); 1797 } else { 1798 error ("reference to external entity in attribute value.", 1799 name, null); 1800 } 1801 break; 1802 case ENTITY_NDATA: 1803 if (externalAllowed) { 1804 error ("unparsed entity reference in content", name, null); 1805 } else { 1806 error ("reference to external entity in attribute value.", 1807 name, null); 1808 } 1809 break; 1810 } 1811 } 1812 1813 1814 1821 private void parsePEReference () 1822 throws SAXException, IOException 1823 { 1824 String name; 1825 1826 name = "%" + readNmtoken (true); 1827 require (';', "parameter entity reference"); 1828 switch (getEntityType (name)) { 1829 case ENTITY_UNDECLARED: 1830 1835 break; 1836 case ENTITY_INTERNAL: 1837 if (inLiteral) 1838 pushString (name, getEntityValue (name)); 1839 else 1840 pushString (name, ' ' + getEntityValue (name) + ' '); 1841 break; 1842 case ENTITY_TEXT: 1843 if (!inLiteral) 1844 pushString (null, " "); 1845 pushURL (name, getEntityPublicId (name), 1846 getEntitySystemId (name), 1847 null, null, null, true); 1848 if (!inLiteral) 1849 pushString (null, " "); 1850 break; 1851 } 1852 } 1853 1854 1868 private void parseEntityDecl () 1869 throws Exception 1870 { 1871 boolean peFlag = false; 1872 1873 expandPE = false; 1875 requireWhitespace (); 1876 if (tryRead ('%')) { 1877 peFlag = true; 1878 requireWhitespace (); 1879 } 1880 expandPE = true; 1881 1882 String name = readNmtoken (true); 1885 if (peFlag) { 1886 name = "%" + name; 1887 } 1888 1889 requireWhitespace (); 1891 char c = readCh (); 1892 unread (c); 1893 if (c == '"' || c == '\'') { 1894 String value = readLiteral (0); 1897 setInternalEntity (name, value); 1898 } else { 1899 String[] ids = readExternalIds (false); 1901 if (ids [1] == null) { 1902 error ("system identifer missing", name, null); 1903 } 1904 1905 boolean white = tryWhitespace (); 1907 if (!peFlag && tryRead ("NDATA")) { 1908 if (!white) 1909 error ("whitespace required before NDATA"); 1910 requireWhitespace (); 1911 String notationName = readNmtoken (true); 1912 setExternalDataEntity (name, ids [0], ids [1], notationName); 1913 } else { 1914 setExternalTextEntity (name, ids [0], ids [1]); 1915 } 1916 } 1917 1918 skipWhitespace (); 1920 require ('>', "NDATA"); 1921 } 1922 1923 1924 1933 private void parseNotationDecl () 1934 throws Exception 1935 { 1936 String nname, ids[]; 1937 1938 1939 requireWhitespace (); 1940 nname = readNmtoken (true); 1941 1942 requireWhitespace (); 1943 1944 ids = readExternalIds (true); 1946 if (ids [0] == null && ids [1] == null) { 1947 error ("external identifer missing", nname, null); 1948 } 1949 1950 setNotation (nname, ids [0], ids [1]); 1952 1953 skipWhitespace (); 1954 require ('>', "notation declaration"); 1955 } 1956 1957 1958 1964 private void parseCharData () 1965 throws Exception 1966 { 1967 char c; 1968 1969 if (USE_CHEATS) { 1975 int lineAugment = 0; 1976 int columnAugment = 0; 1977 1978loop: 1979 for (int i = readBufferPos; i < readBufferLength; i++) { 1980 1981 switch (c = readBuffer [i]) { 1982 case '\n': 1983 lineAugment++; 1984 columnAugment = 0; 1985 break; 1986 case '&': 1987 case '<': 1988 int start = readBufferPos; 1989 columnAugment++; 1990 readBufferPos = i; 1991 if (lineAugment > 0) { 1992 line += lineAugment; 1993 column = columnAugment; 1994 } else { 1995 column += columnAugment; 1996 } 1997 dataBufferAppend (readBuffer, start, i - start); 1998 return; 1999 case ']': 2000 if ((i + 2) < readBufferLength) { 2002 if (readBuffer [i + 1] == ']' 2003 && readBuffer [i + 2] == '>') { 2004 error ("character data may not contain ']]>'"); 2005 } 2006 } 2007 columnAugment++; 2008 break; 2009 default: 2010 if (c < 0x0020 || c > 0xFFFD) 2011 error ("illegal XML character U+" 2012 + Integer.toHexString (c)); 2013 case '\r': 2015 case '\t': 2016 columnAugment++; 2017 } 2018 } 2019 } 2020 2021 2024 int closeSquareBracketCount = 0; 2025 while (true) { 2026 c = readCh (); 2027 switch (c) { 2028 case '<': 2029 case '&': 2030 unread (c); 2031 return; 2032 case ']': 2033 closeSquareBracketCount++; 2034 dataBufferAppend(c); 2035 break; 2036 case '>': 2037 if (closeSquareBracketCount>=2) { 2038 error ("']]>' is not allowed here"); 2040 break; 2041 } 2042 default: 2044 closeSquareBracketCount=0; 2045 dataBufferAppend (c); 2046 break; 2047 } 2048 } 2049 } 2050 2051 2052 2056 2059 private void requireWhitespace () 2060 throws SAXException, IOException 2061 { 2062 char c = readCh (); 2063 if (isWhitespace (c)) { 2064 skipWhitespace (); 2065 } else { 2066 error ("whitespace required", c, null); 2067 } 2068 } 2069 2070 2071 2074 private void parseWhitespace () throws Exception 2076 { 2077 char c = readCh (); 2078 while (isWhitespace (c)) { 2079 dataBufferAppend (c); 2080 c = readCh (); 2081 } 2082 unread (c); 2083 } 2084 2085 2086 2092 private void skipWhitespace () 2093 throws SAXException, IOException 2094 { 2095 if (USE_CHEATS) { 2100 int lineAugment = 0; 2101 int columnAugment = 0; 2102 2103loop: 2104 for (int i = readBufferPos; i < readBufferLength; i++) { 2105 switch (readBuffer [i]) { 2106 case ' ': 2107 case '\t': 2108 case '\r': 2109 columnAugment++; 2110 break; 2111 case '\n': 2112 lineAugment++; 2113 columnAugment = 0; 2114 break; 2115 case '%': 2116 if (expandPE) 2117 break loop; 2118 default: 2120 readBufferPos = i; 2121 if (lineAugment > 0) { 2122 line += lineAugment; 2123 column = columnAugment; 2124 } else { 2125 column += columnAugment; 2126 } 2127 return; 2128 } 2129 } 2130 } 2131 2132 char c = readCh (); 2134 while (isWhitespace (c)) { 2135 c = readCh (); 2136 } 2137 unread (c); 2138 } 2139 2140 2141 2148 private String readNmtoken (boolean isName) 2149 throws SAXException, IOException 2150 { 2151 char c; 2152 2153 if (USE_CHEATS) { 2154loop: 2155 for (int i = readBufferPos; i < readBufferLength; i++) { 2156 c = readBuffer [i]; 2157 switch (c) { 2158 case '%': 2159 if (expandPE) 2160 break loop; 2161 2163 case '<': case '>': case '&': 2165 case ',': case '|': case '*': case '+': case '?': 2166 case ')': 2167 case '=': 2168 case '\'': case '"': 2169 case '[': 2170 case ' ': case '\t': case '\r': case '\n': 2171 case ';': 2172 case '/': 2173 int start = readBufferPos; 2174 if (i == start) 2175 error ("name expected", readBuffer [i], null); 2176 readBufferPos = i; 2177 return intern (readBuffer, start, i - start); 2178 2179 default: 2180 if (i == readBufferPos && isName) { 2183 if (!Character.isUnicodeIdentifierStart (c) 2184 && c != ':' && c != '_') 2185 error ("Not a name start character, U+" 2186 + Integer.toHexString (c)); 2187 } else if (!Character.isUnicodeIdentifierPart (c) 2188 && c != '-' && c != ':' && c != '_' && c != '.' 2189 && !isExtender (c)) 2190 error ("Not a name character, U+" 2191 + Integer.toHexString (c)); 2192 } 2193 } 2194 } 2195 2196 nameBufferPos = 0; 2197 2198 loop: 2200 while (true) { 2201 c = readCh (); 2202 switch (c) { 2203 case '%': 2204 case '<': case '>': case '&': 2205 case ',': case '|': case '*': case '+': case '?': 2206 case ')': 2207 case '=': 2208 case '\'': case '"': 2209 case '[': 2210 case ' ': case '\t': case '\n': case '\r': 2211 case ';': 2212 case '/': 2213 unread (c); 2214 if (nameBufferPos == 0) { 2215 error ("name expected"); 2216 } 2217 if (isName 2219 && !Character.isUnicodeIdentifierStart ( 2220 nameBuffer [0]) 2221 && ":_".indexOf (nameBuffer [0]) == -1) 2222 error ("Not a name start character, U+" 2223 + Integer.toHexString (nameBuffer [0])); 2224 String s = intern (nameBuffer, 0, nameBufferPos); 2225 nameBufferPos = 0; 2226 return s; 2227 default: 2228 2230 if ((nameBufferPos != 0 || !isName) 2231 && !Character.isUnicodeIdentifierPart (c) 2232 && ":-_.".indexOf (c) == -1 2233 && !isExtender (c)) 2234 error ("Not a name character, U+" 2235 + Integer.toHexString (c)); 2236 if (nameBufferPos >= nameBuffer.length) 2237 nameBuffer = 2238 (char[]) extendArray (nameBuffer, 2239 nameBuffer.length, nameBufferPos); 2240 nameBuffer [nameBufferPos++] = c; 2241 } 2242 } 2243 } 2244 2245 private static boolean isExtender (char c) 2246 { 2247 return c == 0x00b7 || c == 0x02d0 || c == 0x02d1 || c == 0x0387 2249 || c == 0x0640 || c == 0x0e46 || c == 0x0ec6 || c == 0x3005 2250 || (c >= 0x3031 && c <= 0x3035) 2251 || (c >= 0x309d && c <= 0x309e) 2252 || (c >= 0x30fc && c <= 0x30fe); 2253 } 2254 2255 2256 2269 private String readLiteral (int flags) 2270 throws SAXException, IOException 2271 { 2272 char delim, c; 2273 int startLine = line; 2274 boolean saved = expandPE; 2275 2276 delim = readCh (); 2278 if (delim != '"' && delim != '\'' && delim != (char) 0) { 2279 error ("expected '\"' or \"'\"", delim, null); 2280 return null; 2281 } 2282 inLiteral = true; 2283 if ((flags & LIT_DISABLE_PE) != 0) 2284 expandPE = false; 2285 2286 char ourBuf [] = readBuffer; 2290 2291 try { 2293 c = readCh (); 2294loop: 2295 while (! (c == delim && readBuffer == ourBuf)) { 2296 switch (c) { 2297 case '\n': 2300 case '\r': 2301 if ((flags & (LIT_ATTRIBUTE | LIT_PUBID)) != 0) 2302 c = ' '; 2303 break; 2304 case '\t': 2305 if ((flags & LIT_ATTRIBUTE) != 0) 2306 c = ' '; 2307 break; 2308 case '&': 2309 c = readCh (); 2310 if (c == '#') { 2313 if ((flags & LIT_DISABLE_CREF) != 0) { 2314 dataBufferAppend ('&'); 2315 continue; 2316 } 2317 parseCharRef (); 2318 2319 } else { 2321 unread (c); 2322 if ((flags & LIT_ENTITY_REF) > 0) { 2324 parseEntityRef (false); 2325 2326 } else if ((flags & LIT_DISABLE_EREF) != 0) { 2328 dataBufferAppend ('&'); 2329 2330 } else { 2332 String name = readNmtoken (true); 2333 require (';', "entity reference"); 2334 if ((flags & LIT_ENTITY_CHECK) != 0 2335 && getEntityType (name) == 2336 ENTITY_UNDECLARED) { 2337 error ("General entity '" + name 2339 + "' must be declared before use"); 2340 } 2341 dataBufferAppend ('&'); 2342 dataBufferAppend (name); 2343 dataBufferAppend (';'); 2344 } 2345 } 2346 c = readCh (); 2347 continue loop; 2348 2349 case '<': 2350 if ((flags & LIT_ATTRIBUTE) != 0) 2353 error ("attribute values may not contain '<'"); 2354 break; 2355 2356 2358 default: 2359 break; 2360 } 2361 dataBufferAppend (c); 2362 c = readCh (); 2363 } 2364 } catch (EOFException e) { 2365 error ("end of input while looking for delimiter (started on line " 2366 + startLine + ')', null, new Character (delim).toString ()); 2367 } 2368 inLiteral = false; 2369 expandPE = saved; 2370 2371 if ((flags & LIT_NORMALIZE) > 0) { 2373 dataBufferNormalize (); 2374 } 2375 2376 return dataBufferToString (); 2378 } 2379 2380 2381 2387 private String[] readExternalIds (boolean inNotation) 2388 throws Exception 2389 { 2390 char c; 2391 String ids[] = new String [2]; 2392 int flags = LIT_DISABLE_CREF | LIT_DISABLE_PE | LIT_DISABLE_EREF; 2393 2394 if (tryRead ("PUBLIC")) { 2395 requireWhitespace (); 2396 ids [0] = readLiteral (LIT_NORMALIZE | LIT_PUBID | flags); 2397 if (inNotation) { 2398 skipWhitespace (); 2399 c = readCh (); 2400 unread (c); 2401 if (c == '"' || c == '\'') { 2402 ids [1] = readLiteral (flags); 2403 } 2404 } else { 2405 requireWhitespace (); 2406 ids [1] = readLiteral (flags); 2407 } 2408 2409 for (int i = 0; i < ids [0].length (); i++) { 2410 c = ids [0].charAt (i); 2411 if (c >= 'a' && c <= 'z') 2412 continue; 2413 if (c >= 'A' && c <= 'Z') 2414 continue; 2415 if (" \r\n0123456789-' ()+,./:=?;!*#@$_%".indexOf (c) != -1) 2416 continue; 2417 error ("illegal PUBLIC id character U+" 2418 + Integer.toHexString (c)); 2419 } 2420 } else if (tryRead ("SYSTEM")) { 2421 requireWhitespace (); 2422 ids [1] = readLiteral (flags); 2423 } 2424 2425 2429 return ids; 2430 } 2431 2432 2433 2441 private final boolean isWhitespace (char c) 2442 { 2443 if (c > 0x20) 2444 return false; 2445 if (c == 0x20 || c == 0x0a || c == 0x09 || c == 0x0d) 2446 return true; 2447 return false; } 2449 2450 2451 2455 2456 2459 private void dataBufferAppend (char c) 2460 { 2461 if (dataBufferPos >= dataBuffer.length) 2463 dataBuffer = 2464 (char[]) extendArray (dataBuffer, 2465 dataBuffer.length, dataBufferPos); 2466 dataBuffer [dataBufferPos++] = c; 2467 } 2468 2469 2470 2473 private void dataBufferAppend (String s) 2474 { 2475 dataBufferAppend (s.toCharArray (), 0, s.length ()); 2476 } 2477 2478 2479 2482 private void dataBufferAppend (char ch[], int start, int length) 2483 { 2484 dataBuffer = (char[]) 2485 extendArray (dataBuffer, dataBuffer.length, 2486 dataBufferPos + length); 2487 2488 System.arraycopy (ch, start, dataBuffer, dataBufferPos, length); 2489 dataBufferPos += length; 2490 } 2491 2492 2493 2496 private void dataBufferNormalize () 2497 { 2498 int i = 0; 2499 int j = 0; 2500 int end = dataBufferPos; 2501 2502 while (j < end && dataBuffer [j] == ' ') { 2504 j++; 2505 } 2506 2507 while (end > j && dataBuffer [end - 1] == ' ') { 2509 end --; 2510 } 2511 2512 while (j < end) { 2514 2515 char c = dataBuffer [j++]; 2516 2517 if (c == ' ') { 2520 while (j < end && dataBuffer [j++] == ' ') {} 2521 2522 dataBuffer [i++] = ' '; 2523 dataBuffer [i++] = dataBuffer [j - 1]; 2524 } else { 2525 dataBuffer [i++] = c; 2526 } 2527 } 2528 2529 dataBufferPos = i; 2531 } 2532 2533 2534 2537 private String dataBufferToString () 2538 { 2539 String s = new String (dataBuffer, 0, dataBufferPos); 2540 dataBufferPos = 0; 2541 return s; 2542 } 2543 2544 2545 2549 private void dataBufferFlush () 2550 throws SAXException 2551 { 2552 if (currentElementContent == CONTENT_ELEMENTS 2553 && dataBufferPos > 0 2554 && !inCDATA 2555 ) { 2556 for (int i = 0; i < dataBufferPos; i++) { 2559 if (!isWhitespace (dataBuffer [i])) { 2560 handler.charData (dataBuffer, 0, dataBufferPos); 2561 dataBufferPos = 0; 2562 } 2563 } 2564 if (dataBufferPos > 0) { 2565 handler.ignorableWhitespace (dataBuffer, 0, dataBufferPos); 2566 dataBufferPos = 0; 2567 } 2568 } else if (dataBufferPos > 0) { 2569 handler.charData (dataBuffer, 0, dataBufferPos); 2570 dataBufferPos = 0; 2571 } 2572 } 2573 2574 2575 2581 private void require (String delim, String context) 2582 throws SAXException, IOException 2583 { 2584 int length = delim.length (); 2585 char ch []; 2586 2587 if (length < dataBuffer.length) { 2588 ch = dataBuffer; 2589 delim.getChars (0, length, ch, 0); 2590 } else 2591 ch = delim.toCharArray (); 2592 2593 if (USE_CHEATS 2594 && length <= (readBufferLength - readBufferPos)) { 2595 int offset = readBufferPos; 2596 2597 for (int i = 0; i < length; i++, offset++) 2598 if (ch [i] != readBuffer [offset]) 2599 error ("unexpected characters in " + context, null, delim); 2600 readBufferPos = offset; 2601 2602 } else { 2603 for (int i = 0; i < length; i++) 2604 require (ch [i], delim); 2605 } 2606 } 2607 2608 2609 2612 private void require (char delim, String after) 2613 throws SAXException, IOException 2614 { 2615 char c = readCh (); 2616 2617 if (c != delim) { 2618 error ("unexpected character after " + after, c, delim+""); 2619 } 2620 } 2621 2622 2623 2639 public String intern (char ch[], int start, int length) 2640 { 2641 int index = 0; 2642 int hash = 0; 2643 Object bucket []; 2644 2645 for (int i = start; i < start + length; i++) 2647 hash = 31 * hash + ch [i]; 2648 hash = (hash & 0x7fffffff) % SYMBOL_TABLE_LENGTH; 2649 2650 if ((bucket = symbolTable [hash]) == null) { 2652 bucket = new Object [8]; 2654 2655 } else { 2658 while (index < bucket.length) { 2659 char chFound [] = (char []) bucket [index]; 2660 2661 if (chFound == null) 2663 break; 2664 2665 if (chFound.length == length) { 2667 for (int i = 0; i < chFound.length; i++) { 2668 if (ch [start + i] != chFound [i]) { 2670 break; 2671 } else if (i == length - 1) { 2672 return (String) bucket [index + 1]; 2674 } 2675 } 2676 } 2677 index += 2; 2678 } 2679 2681 bucket = (Object []) extendArray (bucket, bucket.length, index); 2683 } 2684 symbolTable [hash] = bucket; 2685 2686 String s = new String (ch, start, length).intern (); 2689 bucket [index] = s.toCharArray (); 2690 bucket [index + 1] = s; 2691 return s; 2692 } 2693 2694 2695 2699 private Object extendArray (Object array, int currentSize, int requiredSize) 2700 { 2701 if (requiredSize < currentSize) { 2702 return array; 2703 } else { 2704 Object newArray = null; 2705 int newSize = currentSize * 2; 2706 2707 if (newSize <= requiredSize) 2708 newSize = requiredSize + 1; 2709 2710 if (array instanceof char[]) 2711 newArray = new char [newSize]; 2712 else if (array instanceof Object[]) 2713 newArray = new Object [newSize]; 2714 else 2715 throw new RuntimeException (); 2716 2717 System.arraycopy (array, 0, newArray, 0, currentSize); 2718 return newArray; 2719 } 2720 } 2721 2722 2723 2727 2728 2732 2741 public Enumeration declaredElements () 2742 { 2743 return elementInfo.keys (); 2744 } 2745 2746 2747 2758 private int getContentType (Object element [], int defaultType) 2759 { 2760 int retval; 2761 2762 if (element == null) 2763 return defaultType; 2764 retval = ((Integer) element [0]).intValue (); 2765 if (retval == CONTENT_UNDECLARED) 2766 retval = defaultType; 2767 return retval; 2768 } 2769 2770 2771 2782 public int getElementContentType (String name) 2783 { 2784 Object element [] = (Object []) elementInfo.get (name); 2785 return getContentType (element, CONTENT_UNDECLARED); 2786 } 2787 2788 2789 2797 public String getElementContentModel (String name) 2798 { 2799 Object element[] = (Object[]) elementInfo.get (name); 2800 if (element == null) { 2801 return null; 2802 } else { 2803 return (String) element [1]; 2804 } 2805 } 2806 2807 2808 2815 private void setElement (String name, int contentType, 2816 String contentModel, Hashtable attributes) 2817 throws Exception 2818 { 2819 Object element[] = (Object []) elementInfo.get (name); 2820 2821 if (element == null) { 2823 element = new Object [3]; 2824 element [0] = new Integer (contentType); 2825 element [1] = contentModel; 2826 element [2] = attributes; 2827 elementInfo.put (name, element); 2828 return; 2829 } 2830 2831 if (contentType != CONTENT_UNDECLARED) { 2833 if (((Integer) element [0]).intValue () == CONTENT_UNDECLARED) { 2835 element [0] = new Integer (contentType); 2836 element [1] = contentModel; 2837 } else { 2838 } 2841 } 2842 2843 else if (attributes != null) { 2845 element [2] = attributes; 2846 } 2847 2848 } 2849 2850 2851 2855 private Hashtable getElementAttributes (String name) 2856 { 2857 Object element[] = (Object[]) elementInfo.get (name); 2858 if (element == null) { 2859 return null; 2860 } else { 2861 return (Hashtable) element [2]; 2862 } 2863 } 2864 2865 2866 2867 2871 2883 private Enumeration declaredAttributes (Object element []) 2884 { 2885 Hashtable attlist; 2886 2887 if (element == null) 2888 return null; 2889 if ((attlist = (Hashtable) element [2]) == null) 2890 return null; 2891 return attlist.keys (); 2892 } 2893 2894 2906 public Enumeration declaredAttributes (String elname) 2907 { 2908 return declaredAttributes ((Object []) elementInfo.get (elname)); 2909 } 2910 2911 2912 2929 public int getAttributeType (String name, String aname) 2930 { 2931 Object attribute[] = getAttribute (name, aname); 2932 if (attribute == null) { 2933 return ATTRIBUTE_UNDECLARED; 2934 } else { 2935 return ((Integer) attribute [0]).intValue (); 2936 } 2937 } 2938 2939 2940 2948 public String getAttributeEnumeration (String name, String aname) 2949 { 2950 Object attribute[] = getAttribute (name, aname); 2951 if (attribute == null) { 2952 return null; 2953 } else { 2954 return (String) attribute [3]; 2955 } 2956 } 2957 2958 2959 2967 public String getAttributeDefaultValue (String name, String aname) 2968 { 2969 Object attribute[] = getAttribute (name, aname); 2970 if (attribute == null) { 2971 return null; 2972 } else { 2973 return (String) attribute [1]; 2974 } 2975 } 2976 2977 2978 2987 public String getAttributeExpandedValue (String name, String aname) 2988 throws Exception 2989 { 2990 Object attribute[] = getAttribute (name, aname); 2991 2992 if (attribute == null) { 2993 return null; 2994 } else if (attribute [4] == null && attribute [1] != null) { 2995 char buf [] = new char [1]; 2998 int flags = LIT_ENTITY_REF | LIT_ATTRIBUTE; 2999 int type = getAttributeType (name, aname); 3000 3001 if (type != ATTRIBUTE_CDATA && type != ATTRIBUTE_UNDECLARED) 3002 flags |= LIT_NORMALIZE; 3003 buf [0] = '"'; 3004 pushCharArray (null, buf, 0, 1); 3005 pushString (null, (String) attribute [1]); 3006 pushCharArray (null, buf, 0, 1); 3007 attribute [4] = readLiteral (flags); 3008 } 3009 return (String) attribute [4]; 3010 } 3011 3012 3013 3020 public int getAttributeDefaultValueType (String name, String aname) 3021 { 3022 Object attribute[] = getAttribute (name, aname); 3023 if (attribute == null) { 3024 return ATTRIBUTE_DEFAULT_UNDECLARED; 3025 } else { 3026 return ((Integer) attribute [2]).intValue (); 3027 } 3028 } 3029 3030 3031 3038 private void setAttribute (String elName, String name, int type, 3039 String enumeration, 3040 String value, int valueType) 3041 throws Exception 3042 { 3043 Hashtable attlist; 3044 3045 attlist = getElementAttributes (elName); 3047 if (attlist == null) { 3048 attlist = new Hashtable (); 3049 } 3050 3051 if (attlist.get (name) != null) { 3053 return; 3055 } else { 3056 Object[] attribute = new Object [5]; 3057 attribute [0] = new Integer (type); 3058 attribute [1] = value; 3059 attribute [2] = new Integer (valueType); 3060 attribute [3] = enumeration; 3061 attribute [4] = null; 3062 attlist.put (name, attribute); 3063 3064 setElement (elName, CONTENT_UNDECLARED, null, attlist); 3066 } 3067 } 3068 3069 3070 3074 private Object[] getAttribute (String elName, String name) 3075 { 3076 Hashtable attlist = getElementAttributes (elName); 3077 if (attlist == null) { 3078 return null; 3079 } 3080 3081 return (Object[]) attlist.get (name); 3082 } 3083 3084 3085 3089 3100 public Enumeration declaredEntities () 3101 { 3102 return entityInfo.keys (); 3103 } 3104 3105 3106 3114 public int getEntityType (String ename) 3115 { 3116 Object entity[] = (Object[]) entityInfo.get (ename); 3117 if (entity == null) { 3118 return ENTITY_UNDECLARED; 3119 } else { 3120 return ((Integer) entity [0]).intValue (); 3121 } 3122 } 3123 3124 3125 3134 public String getEntityPublicId (String ename) 3135 { 3136 Object entity[] = (Object[]) entityInfo.get (ename); 3137 if (entity == null) { 3138 return null; 3139 } else { 3140 return (String) entity [1]; 3141 } 3142 } 3143 3144 3145 3155 public String getEntitySystemId (String ename) 3156 { 3157 Object entity[] = (Object[]) entityInfo.get (ename); 3158 if (entity == null) { 3159 return null; 3160 } else { 3161 try { 3162 String relativeURI = (String)entity [2]; 3163 URL baseURI = (URL)entity [5]; 3164 if (baseURI==null) return relativeURI; 3165 URL absoluteURI = new URL( baseURI, relativeURI ); 3166 return absoluteURI.toString(); 3167 } catch (IOException err) { 3168 return (String)entity [2]; 3171 } 3172 } 3173 } 3174 3175 3176 3183 public String getEntityValue (String ename) 3184 { 3185 Object entity[] = (Object[]) entityInfo.get (ename); 3186 if (entity == null) { 3187 return null; 3188 } else { 3189 return (String) entity [3]; 3190 } 3191 } 3192 3193 3194 3202 public String getEntityNotationName (String eName) 3203 { 3204 Object entity[] = (Object[]) entityInfo.get (eName); 3205 if (entity == null) { 3206 return null; 3207 } else { 3208 return (String) entity [4]; 3209 } 3210 } 3211 3212 3213 3216 private void setInternalEntity (String eName, String value) 3217 { 3218 setEntity (eName, ENTITY_INTERNAL, null, null, value, null); 3219 } 3220 3221 3222 3225 private void setExternalDataEntity (String eName, String pubid, 3226 String sysid, String nName) 3227 { 3228 setEntity (eName, ENTITY_NDATA, pubid, sysid, null, nName); 3229 } 3230 3231 3232 3235 private void setExternalTextEntity (String eName, 3236 String pubid, String sysid) 3237 { 3238 setEntity (eName, ENTITY_TEXT, pubid, sysid, null, null); 3239 } 3240 3241 3242 3245 private void setEntity (String eName, int eClass, 3246 String pubid, String sysid, 3247 String value, String nName) 3248 { 3249 Object entity[]; 3250 3251 if (entityInfo.get (eName) == null) { 3252 entity = new Object [6]; 3253 entity [0] = new Integer (eClass); 3254 entity [1] = pubid; 3255 entity [2] = sysid; 3256 entity [3] = value; 3257 entity [4] = nName; 3258 entity [5] = (externalEntity == null ? null : externalEntity.getURL()); 3259 3261 entityInfo.put (eName, entity); 3262 } 3263 } 3264 3265 3266 3270 3278 public Enumeration declaredNotations () 3279 { 3280 return notationInfo.keys (); 3281 } 3282 3283 3284 3294 public String getNotationPublicId (String nname) 3295 { 3296 Object notation[] = (Object[]) notationInfo.get (nname); 3297 if (notation == null) { 3298 return null; 3299 } else { 3300 return (String) notation [0]; 3301 } 3302 } 3303 3304 3305 3314 public String getNotationSystemId (String nname) 3315 { 3316 Object notation[] = (Object[]) notationInfo.get (nname); 3317 if (notation == null) { 3318 return null; 3319 } else { 3320 return (String) notation [1]; 3321 } 3322 } 3323 3324 3325 3331 private void setNotation (String nname, String pubid, String sysid) 3332 throws Exception 3333 { 3334 Object notation[]; 3335 3336 if (notationInfo.get (nname) == null) { 3337 notation = new Object [2]; 3338 notation [0] = pubid; 3339 notation [1] = sysid; 3340 notationInfo.put (nname, notation); 3341 } else { 3342 } 3345 } 3346 3347 3348 3352 3353 3356 public int getLineNumber () 3357 { 3358 return line; 3359 } 3360 3361 3362 3365 public int getColumnNumber () 3366 { 3367 return column; 3368 } 3369 3370 3371 3375 3376 3394 private char readCh () 3395 throws SAXException, IOException 3396 { 3397 3398 while (readBufferPos >= readBufferLength) { 3403 switch (sourceType) { 3404 case INPUT_READER: 3405 case INPUT_STREAM: 3406 readDataChunk (); 3407 while (readBufferLength < 1) { 3408 popInput (); 3409 if (readBufferLength < 1) { 3410 readDataChunk (); 3411 } 3412 } 3413 break; 3414 3415 default: 3416 3417 popInput (); 3418 break; 3419 } 3420 } 3421 3422 char c = readBuffer [readBufferPos++]; 3423 3424 if (c == '\n') { 3425 line++; 3426 column = 0; 3427 } else { 3428 if (c == '<') { 3429 ; 3430 } else if ((c < 0x0020 && (c != '\t') && (c != '\r')) || c > 0xFFFD) 3431 error ("illegal XML character U+" 3432 + Integer.toHexString (c)); 3433 3434 else if (c == '%' && expandPE) { 3439 if (peIsError && entityStack.size()==1) 3440 error ("PE reference within declaration in internal subset."); 3442 parsePEReference (); 3443 return readCh (); 3444 } 3445 column++; 3446 } 3447 3448 return c; 3449 } 3450 3451 3452 3467 private void unread (char c) 3468 throws SAXException 3469 { 3470 if (c == '\n') { 3472 line--; 3473 column = -1; 3474 } 3475 if (readBufferPos > 0) { 3476 readBuffer [--readBufferPos] = c; 3477 } else { 3478 pushString (null, new Character (c).toString ()); 3479 } 3480 } 3481 3482 3483 3493 private void unread (char ch[], int length) 3494 throws SAXException 3495 { 3496 for (int i = 0; i < length; i++) { 3497 if (ch [i] == '\n') { 3498 line--; 3499 column = -1; 3500 } 3501 } 3502 if (length < readBufferPos) { 3503 readBufferPos -= length; 3504 } else { 3505 pushCharArray (null, ch, 0, length); 3506 sourceType = INPUT_BUFFER; 3507 } 3508 } 3509 3510 3511 3528 private void pushURL ( 3529 String ename, 3530 String publicId, 3531 String systemId, 3532 Reader reader, 3533 InputStream stream, 3534 String encoding, 3535 boolean isAbsolute 3536 ) throws SAXException, IOException 3537 { 3538 boolean ignoreEncoding = false; 3539 3540 pushInput (ename); 3542 3543 readBuffer = new char [READ_BUFFER_MAX + 4]; 3546 readBufferPos = 0; 3547 readBufferLength = 0; 3548 readBufferOverflow = -1; 3549 is = null; 3550 line = 1; 3551 column = 0; 3552 currentByteCount = 0; 3553 3554 if (!isAbsolute) { 3555 3556 try { 3560 if (systemId != null && externalEntity != null) { 3561 systemId = new URL (externalEntity.getURL (), systemId).toString (); 3562 } else if (baseURI != null) { 3563 systemId = new URL (new URL (baseURI), systemId).toString (); 3564 } 3566 } catch(java.io.IOException err) { 3567 popInput(); 3568 error("Invalid URL " + systemId + " (" + err.getMessage() + ")"); 3569 } 3570 } 3571 3572 if (reader == null && stream == null && systemId != null) { 3576 Object input = null; 3577 try { 3578 input = handler.resolveEntity (publicId, systemId); 3579 } catch (java.io.IOException err) { 3580 popInput(); 3581 error("Failure resolving entity " + systemId + " (" + err.getMessage() + ")"); 3582 } 3583 if (input != null) { 3584 if (input instanceof String) { 3585 systemId = (String) input; 3586 isAbsolute = true; 3587 } else if (input instanceof InputStream) { 3588 stream = (InputStream) input; 3589 } else if (input instanceof Reader) { 3590 reader = (Reader) input; 3591 } 3592 } 3593 } 3594 3595 if (systemId != null) { 3597 handler.startExternalEntity (systemId); 3598 } else { 3599 handler.startExternalEntity ("[unidentified data stream]"); 3600 } 3601 3602 if (reader != null) { 3605 sourceType = INPUT_READER; 3606 this.reader = reader; 3607 tryEncodingDecl (true); 3608 return; 3609 } 3610 3611 sourceType = INPUT_STREAM; 3614 if (stream != null) { 3615 is = stream; 3616 } else { 3617 URL url = new URL (systemId); 3619 try { 3620 externalEntity = url.openConnection (); 3621 externalEntity.connect (); 3622 is = externalEntity.getInputStream (); 3623 } catch (java.io.IOException err) { 3624 try { 3625 popInput(); 3626 } catch (Exception err2) {} 3627 error("Cannot read input file " + err.getMessage()); 3628 } 3629 } 3630 3631 if (!is.markSupported ()) { 3634 is = new BufferedInputStream (is); 3635 } 3636 3637 if (encoding == null && externalEntity != null) { 3639 if (!"file".equals (externalEntity.getURL ().getProtocol ())) { 3643 int temp; 3644 3645 encoding = externalEntity.getContentType (); 3648 3649 if (encoding==null) { 3651 temp = -1; 3652 } else { 3653 temp = encoding.indexOf ("charset"); 3654 } 3655 3656 if (temp < 0) 3660 encoding = null; else { 3662 temp = encoding.indexOf ('=', temp + 7); 3663 encoding = encoding.substring (temp+1); if ((temp = encoding.indexOf (';')) > 0) 3665 encoding = encoding.substring (0, temp); 3666 3667 if ((temp = encoding.indexOf ('(')) > 0) 3669 encoding = encoding.substring (0, temp); 3670 if ((temp = encoding.indexOf ('"')) > 0) 3672 encoding = encoding.substring (temp + 1, 3673 encoding.indexOf ('"', temp + 2)); 3674 encoding.trim (); 3675 } 3676 } 3677 } 3678 3679 if (encoding != null) { 3681 this.encoding = ENCODING_EXTERNAL; 3682 setupDecoding (encoding); 3683 ignoreEncoding = true; 3684 3685 } else { 3687 detectEncoding (); 3688 ignoreEncoding = false; 3689 } 3690 is.mark(100); 3691 3692 try { 3694 tryEncodingDecl (ignoreEncoding); 3695 } catch (EncodingException x) { 3696 encoding = x.getMessage (); 3697 3698 try { 3701 if (sourceType != INPUT_STREAM) 3702 throw x; 3703 3704 is.reset (); 3705 readBufferPos = 0; 3706 readBufferLength = 0; 3707 readBufferOverflow = -1; 3708 line = 1; 3709 currentByteCount = column = 0; 3710 3711 sourceType = INPUT_READER; 3712 this.reader = new InputStreamReader (is, encoding); 3713 is = null; 3714 3715 tryEncodingDecl (true); 3716 3717 } catch (IOException e) { 3718 error ("unsupported text encoding", 3719 encoding, 3720 null); 3721 } 3722 } 3723 } 3724 3725 3726 3740 private String tryEncodingDecl (boolean ignoreEncoding) 3741 throws SAXException, IOException 3742 { 3743 if (tryRead ("<?xml")) { 3745 dataBufferFlush (); 3746 if (tryWhitespace ()) { 3747 if (inputStack.size () > 0) { 3748 return parseTextDecl (ignoreEncoding); 3749 } else { 3750 return parseXMLDecl (ignoreEncoding); 3751 } 3752 } else { 3753 unread ("xml".toCharArray (), 3); 3754 parsePI (); 3755 } 3756 } 3757 return null; 3758 } 3759 3760 3761 3783 private void detectEncoding () 3784 throws SAXException, IOException 3785 { 3786 byte signature[] = new byte [4]; 3787 3788 is.mark (4); 3791 is.read (signature); 3792 is.reset (); 3793 3794 if (tryEncoding (signature, (byte) 0x00, (byte) 0x00, 3798 (byte) 0x00, (byte) 0x3c)) { 3799 encoding = ENCODING_UCS_4_1234; 3802 3803 } else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x00, 3804 (byte) 0x00, (byte) 0x00)) { 3805 encoding = ENCODING_UCS_4_4321; 3807 3808 } else if (tryEncoding (signature, (byte) 0x00, (byte) 0x00, 3809 (byte) 0x3c, (byte) 0x00)) { 3810 encoding = ENCODING_UCS_4_2143; 3812 3813 } else if (tryEncoding (signature, (byte) 0x00, (byte) 0x3c, 3814 (byte) 0x00, (byte) 0x00)) { 3815 encoding = ENCODING_UCS_4_3412; 3817 3818 } 3821 3822 3825 else if (tryEncoding (signature, (byte)0xef, (byte)0xbb, (byte)0xbf)) { 3826 encoding = ENCODING_UTF_8; 3827 is.read(); is.read(); is.read(); 3828 } 3829 3830 else if (tryEncoding (signature, (byte) 0xfe, (byte) 0xff)) { 3837 encoding = ENCODING_UCS_2_12; 3840 is.read (); is.read (); 3841 3842 } else if (tryEncoding (signature, (byte) 0xff, (byte) 0xfe)) { 3843 encoding = ENCODING_UCS_2_21; 3846 is.read (); is.read (); 3847 3848 } else if (tryEncoding (signature, (byte) 0x00, (byte) 0x3c, 3849 (byte) 0x00, (byte) 0x3f)) { 3850 encoding = ENCODING_UCS_2_12; 3853 error ("no byte-order mark for UCS-2 entity"); 3854 3855 } else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x00, 3856 (byte) 0x3f, (byte) 0x00)) { 3857 encoding = ENCODING_UCS_2_21; 3860 error ("no byte-order mark for UCS-2 entity"); 3861 } 3862 3863 else if (tryEncoding (signature, (byte) 0x3c, (byte) 0x3f, 3867 (byte) 0x78, (byte) 0x6d)) { 3868 encoding = ENCODING_UTF_8; 3871 read8bitEncodingDeclaration (); 3872 3873 } else { 3874 3877 encoding = ENCODING_UTF_8; 3879 } 3880 } 3881 3882 3883 3894 private static boolean tryEncoding ( 3895 byte sig[], byte b1, byte b2, byte b3, byte b4) 3896 { 3897 return (sig [0] == b1 && sig [1] == b2 3898 && sig [2] == b3 && sig [3] == b4); 3899 } 3900 3901 3902 3911 private static boolean tryEncoding (byte sig[], byte b1, byte b2) 3912 { 3913 return ((sig [0] == b1) && (sig [1] == b2)); 3914 } 3915 3916 3926 private static boolean tryEncoding (byte sig[], byte b1, byte b2, byte b3) 3927 { 3928 return ((sig [0] == b1) && (sig [1] == b2) && (sig [2] == b3)); 3929 } 3930 3931 3939 private void pushString (String ename, String s) 3940 throws SAXException 3941 { 3942 char ch[] = s.toCharArray (); 3943 pushCharArray (ename, ch, 0, ch.length); 3944 } 3945 3946 3947 3960 private void pushCharArray (String ename, char ch[], int start, int length) 3961 throws SAXException 3962 { 3963 pushInput (ename); 3965 sourceType = INPUT_INTERNAL; 3966 readBuffer = ch; 3967 readBufferPos = start; 3968 readBufferLength = length; 3969 readBufferOverflow = -1; 3970 } 3971 3972 3973 3999 private void pushInput (String ename) 4000 throws SAXException 4001 { 4002 Object input[] = new Object [12]; 4003 4004 if (ename != null) { 4006 Enumeration entities = entityStack.elements (); 4007 while (entities.hasMoreElements ()) { 4008 String e = (String) entities.nextElement (); 4009 if (e == ename) { 4010 error ("recursive reference to entity", ename, null); 4011 } 4012 } 4013 } 4014 entityStack.push (ename); 4015 4016 if (sourceType == INPUT_NONE) { 4018 return; 4019 } 4020 4021 input [0] = new Integer (sourceType); 4024 input [1] = externalEntity; 4025 input [2] = readBuffer; 4026 input [3] = new Integer (readBufferPos); 4027 input [4] = new Integer (readBufferLength); 4028 input [5] = new Integer (line); 4029 input [6] = new Integer (encoding); 4030 input [7] = new Integer (readBufferOverflow); 4031 input [8] = is; 4032 input [9] = new Integer (currentByteCount); 4033 input [10] = new Integer (column); 4034 input [11] = reader; 4035 4036 inputStack.push (input); 4038 } 4039 4040 4041 4056 private void popInput () 4057 throws SAXException, IOException 4058 { 4059 String uri; 4060 4061 if (externalEntity != null) 4062 uri = externalEntity.getURL ().toString (); 4063 else 4064 uri = baseURI; 4065 4066 switch (sourceType) { 4067 case INPUT_STREAM: 4068 if (is!=null) { 4069 if (uri != null) { 4070 handler.endExternalEntity (baseURI); 4071 } 4072 is.close (); 4073 } 4074 break; 4075 case INPUT_READER: 4076 if (reader != null) { 4077 if (uri != null) { 4078 handler.endExternalEntity (baseURI); 4079 } 4080 reader.close (); 4081 } 4082 break; 4083 } 4084 4085 if (inputStack.isEmpty ()) { 4088 throw new EOFException ("no more input"); 4089 } 4090 4091 Object[] input = (Object[]) inputStack.pop (); 4092 entityStack.pop (); 4093 4094 sourceType = ((Integer) input [0]).intValue (); 4095 externalEntity = (URLConnection) input [1]; 4096 readBuffer = (char[]) input [2]; 4097 readBufferPos = ((Integer) input [3]).intValue (); 4098 readBufferLength = ((Integer) input [4]).intValue (); 4099 line = ((Integer) input [5]).intValue (); 4100 encoding = ((Integer) input [6]).intValue (); 4101 readBufferOverflow = ((Integer) input [7]).intValue (); 4102 is = (InputStream) input [8]; 4103 currentByteCount = ((Integer) input [9]).intValue (); 4104 column = ((Integer) input [10]).intValue (); 4105 reader = (Reader) input [11]; 4106 } 4107 4108 4109 4120 private boolean tryRead (char delim) 4121 throws SAXException, IOException 4122 { 4123 char c; 4124 4125 c = readCh (); 4127 4128 if (c == delim) { 4131 return true; 4132 } else { 4133 unread (c); 4134 return false; 4135 } 4136 } 4137 4138 4139 4154 private boolean tryRead (String delim) 4155 throws SAXException, IOException 4156 { 4157 char ch[] = delim.toCharArray (); 4158 char c; 4159 4160 4163 for (int i = 0; i < ch.length; i++) { 4164 c = readCh (); 4165 if (c != ch [i]) { 4166 unread (c); 4167 if (i != 0) { 4168 unread (ch, i); 4169 } 4170 return false; 4171 } 4172 } 4173 return true; 4174 } 4175 4176 4177 4178 4185 private boolean tryWhitespace () 4186 throws SAXException, IOException 4187 { 4188 char c; 4189 c = readCh (); 4190 if (isWhitespace (c)) { 4191 skipWhitespace (); 4192 return true; 4193 } else { 4194 unread (c); 4195 return false; 4196 } 4197 } 4198 4199 4200 4209 private void parseUntil (String delim) 4210 throws SAXException, IOException 4211 { 4212 char c; 4213 int startLine = line; 4214 4215 try { 4216 while (!tryRead (delim)) { 4217 c = readCh (); 4218 dataBufferAppend (c); 4219 } 4220 } catch (EOFException e) { 4221 error ("end of input while looking for delimiter " 4222 + "(started on line " + startLine 4223 + ')', null, delim); 4224 } 4225 } 4226 4227 4228 4239 private void read8bitEncodingDeclaration () 4240 throws SAXException, IOException 4241 { 4242 int ch; 4243 readBufferPos = readBufferLength = 0; 4244 4245 while (true) { 4246 ch = is.read (); 4247 readBuffer [readBufferLength++] = (char) ch; 4248 switch (ch) { 4249 case (int) '>': 4250 return; 4251 case - 1: 4252 error ("end of file before end of XML or encoding declaration.", 4253 null, "?>"); 4254 } 4255 if (readBuffer.length == readBufferLength) 4256 error ("unfinished XML or encoding declaration"); 4257 } 4258 } 4259 4260 4261 4265 4266 4279 private void readDataChunk () 4280 throws SAXException, IOException 4281 { 4282 int count; 4283 4284 if (readBufferOverflow > -1) { 4286 readBuffer [0] = (char) readBufferOverflow; 4287 readBufferOverflow = -1; 4288 readBufferPos = 1; 4289 sawCR = true; 4290 } else { 4291 readBufferPos = 0; 4292 sawCR = false; 4293 } 4294 4295 if (sourceType == INPUT_READER) { 4297 count = reader.read (readBuffer, 4298 readBufferPos, READ_BUFFER_MAX - readBufferPos); 4299 if (count < 0) 4300 readBufferLength = readBufferPos; 4301 else 4302 readBufferLength = readBufferPos + count; 4303 if (readBufferLength > 0) 4304 filterCR (count >= 0); 4305 sawCR = false; 4306 return; 4307 } 4308 4309 count = is.read (rawReadBuffer, 0, READ_BUFFER_MAX); 4311 4312 if (count > 0) { 4316 switch (encoding) { 4317 case ENCODING_ASCII: 4319 copyIso8859_1ReadBuffer (count, (char) 0x0080); 4320 break; 4321 case ENCODING_UTF_8: 4322 copyUtf8ReadBuffer (count); 4323 break; 4324 case ENCODING_ISO_8859_1: 4325 copyIso8859_1ReadBuffer (count, (char) 0); 4326 break; 4327 4328 case ENCODING_UCS_2_12: 4330 copyUcs2ReadBuffer (count, 8, 0); 4331 break; 4332 case ENCODING_UCS_2_21: 4333 copyUcs2ReadBuffer (count, 0, 8); 4334 break; 4335 4336 case ENCODING_UCS_4_1234: 4338 copyUcs4ReadBuffer (count, 24, 16, 8, 0); 4339 break; 4340 case ENCODING_UCS_4_4321: 4341 copyUcs4ReadBuffer (count, 0, 8, 16, 24); 4342 break; 4343 case ENCODING_UCS_4_2143: 4344 copyUcs4ReadBuffer (count, 16, 24, 0, 8); 4345 break; 4346 case ENCODING_UCS_4_3412: 4347 copyUcs4ReadBuffer (count, 8, 0, 24, 16); 4348 break; 4349 } 4350 } else 4351 readBufferLength = readBufferPos; 4352 4353 readBufferPos = 0; 4354 4355 if (sawCR) { 4358 filterCR (count >= 0); 4359 sawCR = false; 4360 4361 if (readBufferLength == 0 && count >= 0) 4363 readDataChunk (); 4364 } 4365 4366 if (count > 0) 4367 currentByteCount += count; 4368 } 4369 4370 4371 4379 private void filterCR (boolean moreData) 4380 { 4381 int i, j; 4382 4383 readBufferOverflow = -1; 4384 4385loop: 4386 for (i = j = readBufferPos; j < readBufferLength; i++, j++) { 4387 switch (readBuffer [j]) { 4388 case '\r': 4389 if (j == readBufferLength - 1) { 4390 if (moreData) { 4391 readBufferOverflow = '\r'; 4392 readBufferLength--; 4393 } else readBuffer [i++] = '\n'; 4395 break loop; 4396 } else if (readBuffer [j + 1] == '\n') { 4397 j++; 4398 } 4399 readBuffer [i] = '\n'; 4400 break; 4401 4402 case '\n': 4403 default: 4404 readBuffer [i] = readBuffer [j]; 4405 break; 4406 } 4407 } 4408 readBufferLength = i; 4409 } 4410 4411 4422 private void copyUtf8ReadBuffer (int count) 4423 throws SAXException, IOException 4424 { 4425 int i = 0; 4426 int j = readBufferPos; 4427 int b1; 4428 char c = 0; 4429 4430 4435 4436 while (i < count) { 4437 b1 = rawReadBuffer [i++]; 4438 4439 if (b1 < 0) { 4443 if ((b1 & 0xe0) == 0xc0) { 4444 c = (char) (((b1 & 0x1f) << 6) 4446 | getNextUtf8Byte (i++, count)); 4447 } else if ((b1 & 0xf0) == 0xe0) { 4448 c = (char) (((b1 & 0x0f) << 12) | 4452 (getNextUtf8Byte (i++, count) << 6) | 4453 getNextUtf8Byte (i++, count)); 4454 } else if ((b1 & 0xf8) == 0xf0) { 4455 int iso646 = b1 & 07; 4460 iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count); 4461 iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count); 4462 iso646 = (iso646 << 6) + getNextUtf8Byte (i++, count); 4463 4464 if (iso646 <= 0xffff) { 4465 c = (char) iso646; 4466 } else { 4467 if (iso646 > 0x0010ffff) 4468 encodingError ( 4469 "UTF-8 value out of range for Unicode", 4470 iso646, 0); 4471 iso646 -= 0x010000; 4472 readBuffer [j++] = (char) (0xd800 | (iso646 >> 10)); 4473 readBuffer [j++] = (char) (0xdc00 | (iso646 & 0x03ff)); 4474 continue; 4475 } 4476 } else { 4477 encodingError ( 4480 "invalid UTF-8 byte (check the XML declaration)", 4481 0xff & b1, i); 4482 c = 0; 4484 } 4485 } else { 4486 c = (char) b1; 4489 } 4490 readBuffer [j++] = c; 4491 if (c == '\r') 4492 sawCR = true; 4493 } 4494 readBufferLength = j; 4496 } 4497 4498 4499 4509 private int getNextUtf8Byte (int pos, int count) 4510 throws SAXException, IOException 4511 { 4512 int val; 4513 4514 if (pos < count) { 4517 val = rawReadBuffer [pos]; 4518 } else { 4519 val = is.read (); 4520 if (val == -1) { 4521 encodingError ("unfinished multi-byte UTF-8 sequence at EOF", 4522 -1, pos); 4523 } 4524 } 4525 4526 if ((val & 0xc0) != 0x80) { 4528 encodingError ("bad continuation of multi-byte UTF-8 sequence", 4529 val, pos + 1); 4530 } 4531 4532 return (val & 0x3f); 4534 } 4535 4536 4537 4551 private void copyIso8859_1ReadBuffer (int count, char mask) 4552 throws IOException 4553 { 4554 int i, j; 4555 for (i = 0, j = readBufferPos; i < count; i++, j++) { 4556 char c = (char) (rawReadBuffer [i] & 0xff); 4557 if ((c & mask) != 0) 4558 throw new CharConversionException ("non-ASCII character U+" 4559 + Integer.toHexString (c)); 4560 readBuffer [j] = c; 4561 if (c == '\r') { 4562 sawCR = true; 4563 } 4564 } 4565 readBufferLength = j; 4566 } 4567 4568 4569 4583 private void copyUcs2ReadBuffer (int count, int shift1, int shift2) 4584 throws SAXException 4585 { 4586 int j = readBufferPos; 4587 4588 if (count > 0 && (count % 2) != 0) { 4589 encodingError ("odd number of bytes in UCS-2 encoding", -1, count); 4590 } 4591 if (shift1 == 0) { for (int i = 0; i < count; i += 2) { 4594 char c = (char) (rawReadBuffer [i + 1] << 8); 4595 c |= 0xff & rawReadBuffer [i]; 4596 readBuffer [j++] = c; 4597 if (c == '\r') 4598 sawCR = true; 4599 } 4600 } else { for (int i = 0; i < count; i += 2) { 4602 char c = (char) (rawReadBuffer [i] << 8); 4603 c |= 0xff & rawReadBuffer [i + 1]; 4604 readBuffer [j++] = c; 4605 if (c == '\r') 4606 sawCR = true; 4607 } 4608 } 4609 readBufferLength = j; 4610 } 4611 4612 4613 4633 private void copyUcs4ReadBuffer (int count, int shift1, int shift2, 4634 int shift3, int shift4) 4635 throws SAXException 4636 { 4637 int j = readBufferPos; 4638 4639 if (count > 0 && (count % 4) != 0) { 4640 encodingError ( 4641 "number of bytes in UCS-4 encoding not divisible by 4", 4642 -1, count); 4643 } 4644 for (int i = 0; i < count; i += 4) { 4645 int value = (((rawReadBuffer [i] & 0xff) << shift1) | 4646 ((rawReadBuffer [i + 1] & 0xff) << shift2) | 4647 ((rawReadBuffer [i + 2] & 0xff) << shift3) | 4648 ((rawReadBuffer [i + 3] & 0xff) << shift4)); 4649 if (value < 0x0000ffff) { 4650 readBuffer [j++] = (char) value; 4651 if (value == (int) '\r') { 4652 sawCR = true; 4653 } 4654 } else if (value < 0x0010ffff) { 4655 value -= 0x010000; 4656 readBuffer [j++] = (char) (0xd8 | ((value >> 10) & 0x03ff)); 4657 readBuffer [j++] = (char) (0xdc | (value & 0x03ff)); 4658 } else { 4659 encodingError ("UCS-4 value out of range for Unicode", 4660 value, i); 4661 } 4662 } 4663 readBufferLength = j; 4664 } 4665 4666 4667 4670 private void encodingError (String message, int value, int offset) 4671 throws SAXException 4672 { 4673 String uri; 4674 4675 if (value != -1) { 4676 message = message + " (code: 0x" + 4677 Integer.toHexString (value) + ')'; 4678 } 4679 if (externalEntity != null) { 4680 uri = externalEntity.getURL ().toString (); 4681 } else { 4682 uri = baseURI; 4683 } 4684 handler.error (message, uri, -1, offset + currentByteCount); 4685 } 4686 4687 4688 4692 4695 private void initializeVariables () 4696 { 4697 line = 1; 4699 column = 0; 4700 4701 dataBufferPos = 0; 4703 dataBuffer = new char [DATA_BUFFER_INITIAL]; 4704 nameBufferPos = 0; 4705 nameBuffer = new char [NAME_BUFFER_INITIAL]; 4706 4707 elementInfo = new Hashtable (); 4709 entityInfo = new Hashtable (); 4710 notationInfo = new Hashtable (); 4711 4712 currentElement = null; 4715 currentElementContent = CONTENT_UNDECLARED; 4716 4717 sourceType = INPUT_NONE; 4719 inputStack = new Stack (); 4720 entityStack = new Stack (); 4721 externalEntity = null; 4722 tagAttributePos = 0; 4723 tagAttributes = new String [100]; 4724 rawReadBuffer = new byte [READ_BUFFER_MAX]; 4725 readBufferOverflow = -1; 4726 4727 inLiteral = false; 4728 expandPE = false; 4729 peIsError = false; 4730 4731 inCDATA = false; 4732 4733 symbolTable = new Object [SYMBOL_TABLE_LENGTH][]; 4734 } 4735 4736 4737 4740 private void cleanupVariables () 4741 { 4742 dataBuffer = null; 4743 nameBuffer = null; 4744 4745 elementInfo = null; 4746 entityInfo = null; 4747 notationInfo = null; 4748 4749 currentElement = null; 4750 4751 inputStack = null; 4752 entityStack = null; 4753 externalEntity = null; 4754 4755 tagAttributes = null; 4756 rawReadBuffer = null; 4757 4758 symbolTable = null; 4759 } 4760 4761 4762 static class EncodingException extends IOException 4763 { 4764 EncodingException (String encoding) { super (encoding); } 4765 } 4766 4767 private SAXDriver handler; 4771 4772 private Reader reader; private InputStream is; private int line; private int column; private int sourceType; private Stack inputStack; private URLConnection externalEntity; private int encoding; private int currentByteCount; 4785 private char readBuffer []; 4789 private int readBufferPos; 4790 private int readBufferLength; 4791 private int readBufferOverflow; 4793 4794 private final static int READ_BUFFER_MAX = 16384; 4798 private byte rawReadBuffer []; 4799 4800 4801 private static int DATA_BUFFER_INITIAL = 4096; 4805 private char dataBuffer []; 4806 private int dataBufferPos; 4807 4808 private static int NAME_BUFFER_INITIAL = 1024; 4812 private char nameBuffer []; 4813 private int nameBufferPos; 4814 4815 4816 private Hashtable elementInfo; 4820 private Hashtable entityInfo; 4821 private Hashtable notationInfo; 4822 4823 4824 private String currentElement; 4828 private int currentElementContent; 4829 4830 private String basePublicId; 4834 private String baseURI; 4835 private int baseEncoding; 4836 private Reader baseReader; 4837 private InputStream baseInputStream; 4838 private char baseInputBuffer []; 4839 private int baseInputBufferStart; 4840 private int baseInputBufferLength; 4841 4842 private Stack entityStack; 4846 4847 private boolean inLiteral; 4852 private boolean expandPE; 4853 private boolean peIsError; 4854 4855 private final static int SYMBOL_TABLE_LENGTH = 1087; 4859 private Object symbolTable [][]; 4860 4861 private String tagAttributes []; 4865 private int tagAttributePos; 4866 4867 private boolean sawCR; 4873 4874 private boolean inCDATA; 4878} 4879 | Popular Tags |