KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > tomcat > util > buf > ByteChunk


1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one or more
3  * contributor license agreements. See the NOTICE file distributed with
4  * this work for additional information regarding copyright ownership.
5  * The ASF licenses this file to You under the Apache License, Version 2.0
6  * (the "License"); you may not use this file except in compliance with
7  * the License. You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */

17
18 package org.apache.tomcat.util.buf;
19
20 import java.io.IOException JavaDoc;
21 import java.io.Serializable JavaDoc;
22
23 /*
24  * In a server it is very important to be able to operate on
25  * the original byte[] without converting everything to chars.
26  * Some protocols are ASCII only, and some allow different
27  * non-UNICODE encodings. The encoding is not known beforehand,
28  * and can even change during the execution of the protocol.
29  * ( for example a multipart message may have parts with different
30  * encoding )
31  *
32  * For HTTP it is not very clear how the encoding of RequestURI
33  * and mime values can be determined, but it is a great advantage
34  * to be able to parse the request without converting to string.
35  */

36
37 // TODO: This class could either extend ByteBuffer, or better a ByteBuffer inside
38
// this way it could provide the search/etc on ByteBuffer, as a helper.
39

40 /**
41  * This class is used to represent a chunk of bytes, and
42  * utilities to manipulate byte[].
43  *
44  * The buffer can be modified and used for both input and output.
45  *
46  * There are 2 modes: The chunk can be associated with a sink - ByteInputChannel or ByteOutputChannel,
47  * which will be used when the buffer is empty ( on input ) or filled ( on output ).
48  * For output, it can also grow. This operating mode is selected by calling setLimit() or
49  * allocate(initial, limit) with limit != -1.
50  *
51  * Various search and append method are defined - similar with String and StringBuffer, but
52  * operating on bytes.
53  *
54  * This is important because it allows processing the http headers directly on the received bytes,
55  * without converting to chars and Strings until the strings are needed. In addition, the charset
56  * is determined later, from headers or user code.
57  *
58  *
59  * @author dac@sun.com
60  * @author James Todd [gonzo@sun.com]
61  * @author Costin Manolache
62  * @author Remy Maucherat
63  */

64 public final class ByteChunk implements Cloneable JavaDoc, Serializable JavaDoc {
65
66     /** Input interface, used when the buffer is emptiy
67      *
68      * Same as java.nio.channel.ReadableByteChannel
69      */

70     public static interface ByteInputChannel {
71         /**
72          * Read new bytes ( usually the internal conversion buffer ).
73          * The implementation is allowed to ignore the parameters,
74          * and mutate the chunk if it wishes to implement its own buffering.
75          */

76         public int realReadBytes(byte cbuf[], int off, int len)
77             throws IOException JavaDoc;
78     }
79
80     /** Same as java.nio.channel.WrittableByteChannel.
81      */

82     public static interface ByteOutputChannel {
83         /**
84          * Send the bytes ( usually the internal conversion buffer ).
85          * Expect 8k output if the buffer is full.
86          */

87         public void realWriteBytes(byte cbuf[], int off, int len)
88             throws IOException JavaDoc;
89     }
90
91     // --------------------
92

93     /** Default encoding used to convert to strings. It should be UTF8,
94     as most standards seem to converge, but the servlet API requires
95     8859_1, and this object is used mostly for servlets.
96     */

97     public static final String JavaDoc DEFAULT_CHARACTER_ENCODING="ISO-8859-1";
98         
99     // byte[]
100
private byte[] buff;
101
102     private int start=0;
103     private int end;
104
105     private String JavaDoc enc;
106
107     private boolean isSet=false; // XXX
108

109     // How much can it grow, when data is added
110
private int limit=-1;
111
112     private ByteInputChannel in = null;
113     private ByteOutputChannel out = null;
114
115     private boolean isOutput=false;
116     private boolean optimizedWrite=true;
117     
118     /**
119      * Creates a new, uninitialized ByteChunk object.
120      */

121     public ByteChunk() {
122     }
123
124     public ByteChunk( int initial ) {
125     allocate( initial, -1 );
126     }
127
128     //--------------------
129
public ByteChunk getClone() {
130     try {
131         return (ByteChunk)this.clone();
132     } catch( Exception JavaDoc ex) {
133         return null;
134     }
135     }
136
137     public boolean isNull() {
138     return ! isSet; // buff==null;
139
}
140     
141     /**
142      * Resets the message buff to an uninitialized state.
143      */

144     public void recycle() {
145     // buff = null;
146
enc=null;
147     start=0;
148     end=0;
149     isSet=false;
150     }
151
152     public void reset() {
153     buff=null;
154     }
155
156     // -------------------- Setup --------------------
157

158     public void allocate( int initial, int limit ) {
159     isOutput=true;
160     if( buff==null || buff.length < initial ) {
161         buff=new byte[initial];
162     }
163     this.limit=limit;
164     start=0;
165     end=0;
166     isSet=true;
167     }
168
169     /**
170      * Sets the message bytes to the specified subarray of bytes.
171      *
172      * @param b the ascii bytes
173      * @param off the start offset of the bytes
174      * @param len the length of the bytes
175      */

176     public void setBytes(byte[] b, int off, int len) {
177         buff = b;
178         start = off;
179         end = start+ len;
180         isSet=true;
181     }
182
183     public void setOptimizedWrite(boolean optimizedWrite) {
184         this.optimizedWrite = optimizedWrite;
185     }
186
187     public void setEncoding( String JavaDoc enc ) {
188     this.enc=enc;
189     }
190     public String JavaDoc getEncoding() {
191         if (enc == null)
192             enc=DEFAULT_CHARACTER_ENCODING;
193         return enc;
194     }
195
196     /**
197      * Returns the message bytes.
198      */

199     public byte[] getBytes() {
200     return getBuffer();
201     }
202
203     /**
204      * Returns the message bytes.
205      */

206     public byte[] getBuffer() {
207     return buff;
208     }
209
210     /**
211      * Returns the start offset of the bytes.
212      * For output this is the end of the buffer.
213      */

214     public int getStart() {
215     return start;
216     }
217
218     public int getOffset() {
219     return start;
220     }
221
222     public void setOffset(int off) {
223         if (end < off ) end=off;
224     start=off;
225     }
226
227     /**
228      * Returns the length of the bytes.
229      * XXX need to clean this up
230      */

231     public int getLength() {
232     return end-start;
233     }
234
235     /** Maximum amount of data in this buffer.
236      *
237      * If -1 or not set, the buffer will grow undefinitely.
238      * Can be smaller than the current buffer size ( which will not shrink ).
239      * When the limit is reached, the buffer will be flushed ( if out is set )
240      * or throw exception.
241      */

242     public void setLimit(int limit) {
243     this.limit=limit;
244     }
245     
246     public int getLimit() {
247     return limit;
248     }
249
250     /**
251      * When the buffer is empty, read the data from the input channel.
252      */

253     public void setByteInputChannel(ByteInputChannel in) {
254         this.in = in;
255     }
256
257     /** When the buffer is full, write the data to the output channel.
258      * Also used when large amount of data is appended.
259      *
260      * If not set, the buffer will grow to the limit.
261      */

262     public void setByteOutputChannel(ByteOutputChannel out) {
263     this.out=out;
264     }
265
266     public int getEnd() {
267     return end;
268     }
269
270     public void setEnd( int i ) {
271     end=i;
272     }
273
274     // -------------------- Adding data to the buffer --------------------
275
/** Append a char, by casting it to byte. This IS NOT intended for unicode.
276      *
277      * @param c
278      * @throws IOException
279      */

280     public void append( char c )
281     throws IOException JavaDoc
282     {
283     append( (byte)c);
284     }
285
286     public void append( byte b )
287     throws IOException JavaDoc
288     {
289     makeSpace( 1 );
290
291     // couldn't make space
292
if( limit >0 && end >= limit ) {
293         flushBuffer();
294     }
295     buff[end++]=b;
296     }
297
298     public void append( ByteChunk src )
299     throws IOException JavaDoc
300     {
301     append( src.getBytes(), src.getStart(), src.getLength());
302     }
303
304     /** Add data to the buffer
305      */

306     public void append( byte src[], int off, int len )
307     throws IOException JavaDoc
308     {
309     // will grow, up to limit
310
makeSpace( len );
311
312     // if we don't have limit: makeSpace can grow as it wants
313
if( limit < 0 ) {
314         // assert: makeSpace made enough space
315
System.arraycopy( src, off, buff, end, len );
316         end+=len;
317         return;
318     }
319
320         // Optimize on a common case.
321
// If the buffer is empty and the source is going to fill up all the
322
// space in buffer, may as well write it directly to the output,
323
// and avoid an extra copy
324
if ( optimizedWrite && len == limit && end == start && out != null ) {
325             out.realWriteBytes( src, off, len );
326             return;
327         }
328     // if we have limit and we're below
329
if( len <= limit - end ) {
330         // makeSpace will grow the buffer to the limit,
331
// so we have space
332
System.arraycopy( src, off, buff, end, len );
333         end+=len;
334         return;
335     }
336
337     // need more space than we can afford, need to flush
338
// buffer
339

340     // the buffer is already at ( or bigger than ) limit
341

342         // We chunk the data into slices fitting in the buffer limit, although
343
// if the data is written directly if it doesn't fit
344

345         int avail=limit-end;
346         System.arraycopy(src, off, buff, end, avail);
347         end += avail;
348
349         flushBuffer();
350
351         int remain = len - avail;
352
353         while (remain > (limit - end)) {
354             out.realWriteBytes( src, (off + len) - remain, limit - end );
355             remain = remain - (limit - end);
356         }
357
358         System.arraycopy(src, (off + len) - remain, buff, end, remain);
359         end += remain;
360
361     }
362
363
364     // -------------------- Removing data from the buffer --------------------
365

366     public int substract()
367         throws IOException JavaDoc {
368
369         if ((end - start) == 0) {
370             if (in == null)
371                 return -1;
372             int n = in.realReadBytes( buff, 0, buff.length );
373             if (n < 0)
374                 return -1;
375         }
376
377         return (buff[start++] & 0xFF);
378
379     }
380
381     public int substract(ByteChunk src)
382         throws IOException JavaDoc {
383
384         if ((end - start) == 0) {
385             if (in == null)
386                 return -1;
387             int n = in.realReadBytes( buff, 0, buff.length );
388             if (n < 0)
389                 return -1;
390         }
391
392         int len = getLength();
393         src.append(buff, start, len);
394         start = end;
395         return len;
396
397     }
398
399     public int substract( byte src[], int off, int len )
400         throws IOException JavaDoc {
401
402         if ((end - start) == 0) {
403             if (in == null)
404                 return -1;
405             int n = in.realReadBytes( buff, 0, buff.length );
406             if (n < 0)
407                 return -1;
408         }
409
410         int n = len;
411         if (len > getLength()) {
412             n = getLength();
413         }
414         System.arraycopy(buff, start, src, off, n);
415         start += n;
416         return n;
417
418     }
419
420
421     /** Send the buffer to the sink. Called by append() when the limit is reached.
422      * You can also call it explicitely to force the data to be written.
423      *
424      * @throws IOException
425      */

426     public void flushBuffer()
427     throws IOException JavaDoc
428     {
429     //assert out!=null
430
if( out==null ) {
431         throw new IOException JavaDoc( "Buffer overflow, no sink " + limit + " " +
432                    buff.length );
433     }
434     out.realWriteBytes( buff, start, end-start );
435     end=start;
436     }
437
438     /** Make space for len chars. If len is small, allocate
439      * a reserve space too. Never grow bigger than limit.
440      */

441     private void makeSpace(int count)
442     {
443     byte[] tmp = null;
444
445     int newSize;
446     int desiredSize=end + count;
447
448     // Can't grow above the limit
449
if( limit > 0 &&
450         desiredSize > limit) {
451         desiredSize=limit;
452     }
453
454     if( buff==null ) {
455         if( desiredSize < 256 ) desiredSize=256; // take a minimum
456
buff=new byte[desiredSize];
457     }
458     
459     // limit < buf.length ( the buffer is already big )
460
// or we already have space XXX
461
if( desiredSize <= buff.length ) {
462         return;
463     }
464     // grow in larger chunks
465
if( desiredSize < 2 * buff.length ) {
466         newSize= buff.length * 2;
467         if( limit >0 &&
468         newSize > limit ) newSize=limit;
469         tmp=new byte[newSize];
470     } else {
471         newSize= buff.length * 2 + count ;
472         if( limit > 0 &&
473         newSize > limit ) newSize=limit;
474         tmp=new byte[newSize];
475     }
476     
477     System.arraycopy(buff, start, tmp, 0, end-start);
478     buff = tmp;
479     tmp = null;
480     end=end-start;
481     start=0;
482     }
483     
484     // -------------------- Conversion and getters --------------------
485

486     public String JavaDoc toString() {
487         if (null == buff) {
488             return null;
489         } else if (end-start == 0) {
490             return "";
491         }
492         return StringCache.toString(this);
493     }
494     
495     public String JavaDoc toStringInternal() {
496         String JavaDoc strValue=null;
497         try {
498             if( enc==null ) enc=DEFAULT_CHARACTER_ENCODING;
499             strValue = new String JavaDoc( buff, start, end-start, enc );
500             /*
501              Does not improve the speed too much on most systems,
502              it's safer to use the "clasical" new String().
503              
504              Most overhead is in creating char[] and copying,
505              the internal implementation of new String() is very close to
506              what we do. The decoder is nice for large buffers and if
507              we don't go to String ( so we can take advantage of reduced GC)
508              
509              // Method is commented out, in:
510               return B2CConverter.decodeString( enc );
511               */

512         } catch (java.io.UnsupportedEncodingException JavaDoc e) {
513             // Use the platform encoding in that case; the usage of a bad
514
// encoding will have been logged elsewhere already
515
strValue = new String JavaDoc(buff, start, end-start);
516         }
517         return strValue;
518     }
519
520     public int getInt()
521     {
522     return Ascii.parseInt(buff, start,end-start);
523     }
524
525     public long getLong() {
526         return Ascii.parseLong(buff, start,end-start);
527     }
528
529
530     // -------------------- equals --------------------
531

532     /**
533      * Compares the message bytes to the specified String object.
534      * @param s the String to compare
535      * @return true if the comparison succeeded, false otherwise
536      */

537     public boolean equals(String JavaDoc s) {
538     // XXX ENCODING - this only works if encoding is UTF8-compat
539
// ( ok for tomcat, where we compare ascii - header names, etc )!!!
540

541     byte[] b = buff;
542     int blen = end-start;
543     if (b == null || blen != s.length()) {
544         return false;
545     }
546     int boff = start;
547     for (int i = 0; i < blen; i++) {
548         if (b[boff++] != s.charAt(i)) {
549         return false;
550         }
551     }
552     return true;
553     }
554
555     /**
556      * Compares the message bytes to the specified String object.
557      * @param s the String to compare
558      * @return true if the comparison succeeded, false otherwise
559      */

560     public boolean equalsIgnoreCase(String JavaDoc s) {
561     byte[] b = buff;
562     int blen = end-start;
563     if (b == null || blen != s.length()) {
564         return false;
565     }
566     int boff = start;
567     for (int i = 0; i < blen; i++) {
568         if (Ascii.toLower(b[boff++]) != Ascii.toLower(s.charAt(i))) {
569         return false;
570         }
571     }
572     return true;
573     }
574
575     public boolean equals( ByteChunk bb ) {
576     return equals( bb.getBytes(), bb.getStart(), bb.getLength());
577     }
578     
579     public boolean equals( byte b2[], int off2, int len2) {
580     byte b1[]=buff;
581     if( b1==null && b2==null ) return true;
582
583     int len=end-start;
584     if ( len2 != len || b1==null || b2==null )
585         return false;
586         
587     int off1 = start;
588
589     while ( len-- > 0) {
590         if (b1[off1++] != b2[off2++]) {
591         return false;
592         }
593     }
594     return true;
595     }
596
597     public boolean equals( CharChunk cc ) {
598     return equals( cc.getChars(), cc.getStart(), cc.getLength());
599     }
600     
601     public boolean equals( char c2[], int off2, int len2) {
602     // XXX works only for enc compatible with ASCII/UTF !!!
603
byte b1[]=buff;
604     if( c2==null && b1==null ) return true;
605     
606     if (b1== null || c2==null || end-start != len2 ) {
607         return false;
608     }
609     int off1 = start;
610     int len=end-start;
611     
612     while ( len-- > 0) {
613         if ( (char)b1[off1++] != c2[off2++]) {
614         return false;
615         }
616     }
617     return true;
618     }
619
620     /**
621      * Returns true if the message bytes starts with the specified string.
622      * @param s the string
623      */

624     public boolean startsWith(String JavaDoc s) {
625     // Works only if enc==UTF
626
byte[] b = buff;
627     int blen = s.length();
628     if (b == null || blen > end-start) {
629         return false;
630     }
631     int boff = start;
632     for (int i = 0; i < blen; i++) {
633         if (b[boff++] != s.charAt(i)) {
634         return false;
635         }
636     }
637     return true;
638     }
639
640     /* Returns true if the message bytes start with the specified byte array */
641     public boolean startsWith(byte[] b2) {
642         byte[] b1 = buff;
643         if (b1 == null && b2 == null) {
644             return true;
645         }
646
647         int len = end - start;
648         if (b1 == null || b2 == null || b2.length > len) {
649             return false;
650         }
651         for (int i = start, j = 0; i < end && j < b2.length; ) {
652             if (b1[i++] != b2[j++])
653                 return false;
654         }
655         return true;
656     }
657
658     /**
659      * Returns true if the message bytes starts with the specified string.
660      * @param s the string
661      * @param pos The position
662      */

663     public boolean startsWithIgnoreCase(String JavaDoc s, int pos) {
664     byte[] b = buff;
665     int len = s.length();
666     if (b == null || len+pos > end-start) {
667         return false;
668     }
669     int off = start+pos;
670     for (int i = 0; i < len; i++) {
671         if (Ascii.toLower( b[off++] ) != Ascii.toLower( s.charAt(i))) {
672         return false;
673         }
674     }
675     return true;
676     }
677
678     public int indexOf( String JavaDoc src, int srcOff, int srcLen, int myOff ) {
679     char first=src.charAt( srcOff );
680
681     // Look for first char
682
int srcEnd = srcOff + srcLen;
683         
684     for( int i=myOff+start; i <= (end - srcLen); i++ ) {
685         if( buff[i] != first ) continue;
686         // found first char, now look for a match
687
int myPos=i+1;
688         for( int srcPos=srcOff + 1; srcPos< srcEnd; ) {
689                 if( buff[myPos++] != src.charAt( srcPos++ ))
690             break;
691                 if( srcPos==srcEnd ) return i-start; // found it
692
}
693     }
694     return -1;
695     }
696
697     // -------------------- Hash code --------------------
698

699     // normal hash.
700
public int hash() {
701     return hashBytes( buff, start, end-start);
702     }
703
704     // hash ignoring case
705
public int hashIgnoreCase() {
706     return hashBytesIC( buff, start, end-start );
707     }
708
709     private static int hashBytes( byte buff[], int start, int bytesLen ) {
710     int max=start+bytesLen;
711     byte bb[]=buff;
712     int code=0;
713     for (int i = start; i < max ; i++) {
714         code = code * 37 + bb[i];
715     }
716     return code;
717     }
718
719     private static int hashBytesIC( byte bytes[], int start,
720                     int bytesLen )
721     {
722     int max=start+bytesLen;
723     byte bb[]=bytes;
724     int code=0;
725     for (int i = start; i < max ; i++) {
726         code = code * 37 + Ascii.toLower(bb[i]);
727     }
728     return code;
729     }
730
731     /**
732      * Returns true if the message bytes starts with the specified string.
733      * @param c the character
734      * @param starting The start position
735      */

736     public int indexOf(char c, int starting) {
737     int ret = indexOf( buff, start+starting, end, c);
738     return (ret >= start) ? ret - start : -1;
739     }
740
741     public static int indexOf( byte bytes[], int off, int end, char qq )
742     {
743     // Works only for UTF
744
while( off < end ) {
745         byte b=bytes[off];
746         if( b==qq )
747         return off;
748         off++;
749     }
750     return -1;
751     }
752
753     /** Find a character, no side effects.
754      * @return index of char if found, -1 if not
755      */

756     public static int findChar( byte buf[], int start, int end, char c ) {
757     byte b=(byte)c;
758     int offset = start;
759     while (offset < end) {
760         if (buf[offset] == b) {
761         return offset;
762         }
763         offset++;
764     }
765     return -1;
766     }
767
768     /** Find a character, no side effects.
769      * @return index of char if found, -1 if not
770      */

771     public static int findChars( byte buf[], int start, int end, byte c[] ) {
772     int clen=c.length;
773     int offset = start;
774     while (offset < end) {
775         for( int i=0; i<clen; i++ )
776         if (buf[offset] == c[i]) {
777             return offset;
778         }
779         offset++;
780     }
781     return -1;
782     }
783
784     /** Find the first character != c
785      * @return index of char if found, -1 if not
786      */

787     public static int findNotChars( byte buf[], int start, int end, byte c[] )
788     {
789     int clen=c.length;
790     int offset = start;
791     boolean found;
792         
793     while (offset < end) {
794         found=true;
795         for( int i=0; i<clen; i++ ) {
796         if (buf[offset] == c[i]) {
797             found=false;
798             break;
799         }
800         }
801         if( found ) { // buf[offset] != c[0..len]
802
return offset;
803         }
804         offset++;
805     }
806     return -1;
807     }
808
809
810     /**
811      * Convert specified String to a byte array. This ONLY WORKS for ascii, UTF chars will be truncated.
812      *
813      * @param value to convert to byte array
814      * @return the byte array value
815      */

816     public static final byte[] convertToBytes(String JavaDoc value) {
817         byte[] result = new byte[value.length()];
818         for (int i = 0; i < value.length(); i++) {
819             result[i] = (byte) value.charAt(i);
820         }
821         return result;
822     }
823     
824     
825 }
826
Popular Tags