ByteChunk


1   /*
2    *  Licensed to the Apache Software Foundation (ASF) under one or more
3    *  contributor license agreements.  See the NOTICE file distributed with
4    *  this work for additional information regarding copyright ownership.
5    *  The ASF licenses this file to You under the Apache License, Version 2.0
6    *  (the "License"); you may not use this file except in compliance with
7    *  the License.  You may obtain a copy of the License at
8    *
9    *      http://www.apache.org/licenses/LICENSE-2.0
10   *
11   *  Unless required by applicable law or agreed to in writing, software
12   *  distributed under the License is distributed on an "AS IS" BASIS,
13   *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14   *  See the License for the specific language governing permissions and
15   *  limitations under the License.
16   */
17  
18  package org.apache.tomcat.util.buf;
19  
20  import java.io.IOException  ;
21  import java.io.Serializable  ;
22  
23  /*
24   * In a server it is very important to be able to operate on
25   * the original byte[] without converting everything to chars.
26   * Some protocols are ASCII only, and some allow different
27   * non-UNICODE encodings. The encoding is not known beforehand,
28   * and can even change during the execution of the protocol.
29   * ( for example a multipart message may have parts with different
30   *  encoding )
31   *
32   * For HTTP it is not very clear how the encoding of RequestURI
33   * and mime values can be determined, but it is a great advantage
34   * to be able to parse the request without converting to string.
35   */
36  
37  // TODO: This class could either extend ByteBuffer, or better a ByteBuffer inside
38  // this way it could provide the search/etc on ByteBuffer, as a helper.
39  
40  /**
41   * This class is used to represent a chunk of bytes, and
42   * utilities to manipulate byte[].
43   *
44   * The buffer can be modified and used for both input and output.
45   *
46   * There are 2 modes: The chunk can be associated with a sink - ByteInputChannel or ByteOutputChannel,
47   * which will be used when the buffer is empty ( on input ) or filled ( on output ).
48   * For output, it can also grow. This operating mode is selected by calling setLimit() or
49   * allocate(initial, limit) with limit != -1.
50   *
51   * Various search and append method are defined - similar with String and StringBuffer, but
52   * operating on bytes.
53   *
54   * This is important because it allows processing the http headers directly on the received bytes,
55   * without converting to chars and Strings until the strings are needed. In addition, the charset
56   * is determined later, from headers or user code.
57   *
58   *
59   * @author dac@sun.com
60   * @author James Todd [gonzo@sun.com]
61   * @author Costin Manolache
62   * @author Remy Maucherat
63   */
64  public final class ByteChunk implements Cloneable  , Serializable   {
65  
66      /** Input interface, used when the buffer is emptiy
67       *
68       * Same as java.nio.channel.ReadableByteChannel
69       */
70      public static interface ByteInputChannel {
71          /** 
72           * Read new bytes ( usually the internal conversion buffer ).
73           * The implementation is allowed to ignore the parameters, 
74           * and mutate the chunk if it wishes to implement its own buffering.
75           */
76          public int realReadBytes(byte cbuf[], int off, int len)
77              throws IOException  ;
78      }
79  
80      /** Same as java.nio.channel.WrittableByteChannel.
81       */
82      public static interface ByteOutputChannel {
83          /** 
84           * Send the bytes ( usually the internal conversion buffer ).
85           * Expect 8k output if the buffer is full.
86           */
87          public void realWriteBytes(byte cbuf[], int off, int len)
88              throws IOException  ;
89      }
90  
91      // --------------------
92  
93      /** Default encoding used to convert to strings. It should be UTF8,
94      as most standards seem to converge, but the servlet API requires
95      8859_1, and this object is used mostly for servlets. 
96      */
97      public static final String   DEFAULT_CHARACTER_ENCODING="ISO-8859-1";
98          
99      // byte[]
100     private byte[] buff;
101 
102     private int start=0;
103     private int end;
104 
105     private String   enc;
106 
107     private boolean isSet=false; // XXX
108 
109     // How much can it grow, when data is added
110     private int limit=-1;
111 
112     private ByteInputChannel in = null;
113     private ByteOutputChannel out = null;
114 
115     private boolean isOutput=false;
116     private boolean optimizedWrite=true;
117     
118     /**
119      * Creates a new, uninitialized ByteChunk object.
120      */
121     public ByteChunk() {
122     }
123 
124     public ByteChunk( int initial ) {
125     allocate( initial, -1 );
126     }
127 
128     //--------------------
129     public ByteChunk getClone() {
130     try {
131         return (ByteChunk)this.clone();
132     } catch( Exception   ex) {
133         return null;
134     }
135     }
136 
137     public boolean isNull() {
138     return ! isSet; // buff==null;
139     }
140     
141     /**
142      * Resets the message buff to an uninitialized state.
143      */
144     public void recycle() {
145     //  buff = null;
146     enc=null;
147     start=0;
148     end=0;
149     isSet=false;
150     }
151 
152     public void reset() {
153     buff=null;
154     }
155 
156     // -------------------- Setup --------------------
157 
158     public void allocate( int initial, int limit  ) {
159     isOutput=true;
160     if( buff==null || buff.length < initial ) {
161         buff=new byte[initial];
162     }    
163     this.limit=limit;
164     start=0;
165     end=0;
166     isSet=true;
167     }
168 
169     /**
170      * Sets the message bytes to the specified subarray of bytes.
171      * 
172      * @param b the ascii bytes
173      * @param off the start offset of the bytes
174      * @param len the length of the bytes
175      */
176     public void setBytes(byte[] b, int off, int len) {
177         buff = b;
178         start = off;
179         end = start+ len;
180         isSet=true;
181     }
182 
183     public void setOptimizedWrite(boolean optimizedWrite) {
184         this.optimizedWrite = optimizedWrite;
185     }
186 
187     public void setEncoding( String   enc ) {
188     this.enc=enc;
189     }
190     public String   getEncoding() {
191         if (enc == null)
192             enc=DEFAULT_CHARACTER_ENCODING;
193         return enc;
194     }
195 
196     /**
197      * Returns the message bytes.
198      */
199     public byte[] getBytes() {
200     return getBuffer();
201     }
202 
203     /**
204      * Returns the message bytes.
205      */
206     public byte[] getBuffer() {
207     return buff;
208     }
209 
210     /**
211      * Returns the start offset of the bytes.
212      * For output this is the end of the buffer.
213      */
214     public int getStart() {
215     return start;
216     }
217 
218     public int getOffset() {
219     return start;
220     }
221 
222     public void setOffset(int off) {
223         if (end < off ) end=off;
224     start=off;
225     }
226 
227     /**
228      * Returns the length of the bytes.
229      * XXX need to clean this up
230      */
231     public int getLength() {
232     return end-start;
233     }
234 
235     /** Maximum amount of data in this buffer.
236      *
237      *  If -1 or not set, the buffer will grow undefinitely.
238      *  Can be smaller than the current buffer size ( which will not shrink ).
239      *  When the limit is reached, the buffer will be flushed ( if out is set )
240      *  or throw exception.
241      */
242     public void setLimit(int limit) {
243     this.limit=limit;
244     }
245     
246     public int getLimit() {
247     return limit;
248     }
249 
250     /**
251      * When the buffer is empty, read the data from the input channel.
252      */
253     public void setByteInputChannel(ByteInputChannel in) {
254         this.in = in;
255     }
256 
257     /** When the buffer is full, write the data to the output channel.
258      *  Also used when large amount of data is appended.
259      *
260      *  If not set, the buffer will grow to the limit.
261      */
262     public void setByteOutputChannel(ByteOutputChannel out) {
263     this.out=out;
264     }
265 
266     public int getEnd() {
267     return end;
268     }
269 
270     public void setEnd( int i ) {
271     end=i;
272     }
273 
274     // -------------------- Adding data to the buffer --------------------
275     /** Append a char, by casting it to byte. This IS NOT intended for unicode.
276      *
277      * @param c
278      * @throws IOException
279      */
280     public void append( char c )
281     throws IOException  
282     {
283     append( (byte)c);
284     }
285 
286     public void append( byte b )
287     throws IOException  
288     {
289     makeSpace( 1 );
290 
291     // couldn't make space
292     if( limit >0 && end >= limit ) {
293         flushBuffer();
294     }
295     buff[end++]=b;
296     }
297 
298     public void append( ByteChunk src )
299     throws IOException  
300     {
301     append( src.getBytes(), src.getStart(), src.getLength());
302     }
303 
304     /** Add data to the buffer
305      */
306     public void append( byte src[], int off, int len )
307     throws IOException  
308     {
309     // will grow, up to limit
310     makeSpace( len );
311 
312     // if we don't have limit: makeSpace can grow as it wants
313     if( limit < 0 ) {
314         // assert: makeSpace made enough space
315         System.arraycopy( src, off, buff, end, len );
316         end+=len;
317         return;
318     }
319 
320         // Optimize on a common case.
321         // If the buffer is empty and the source is going to fill up all the
322         // space in buffer, may as well write it directly to the output,
323         // and avoid an extra copy
324         if ( optimizedWrite && len == limit && end == start && out != null ) {
325             out.realWriteBytes( src, off, len );
326             return;
327         }
328     // if we have limit and we're below
329     if( len <= limit - end ) {
330         // makeSpace will grow the buffer to the limit,
331         // so we have space
332         System.arraycopy( src, off, buff, end, len );
333         end+=len;
334         return;
335     }
336 
337     // need more space than we can afford, need to flush
338     // buffer
339 
340     // the buffer is already at ( or bigger than ) limit
341 
342         // We chunk the data into slices fitting in the buffer limit, although
343         // if the data is written directly if it doesn't fit
344 
345         int avail=limit-end;
346         System.arraycopy(src, off, buff, end, avail);
347         end += avail;
348 
349         flushBuffer();
350 
351         int remain = len - avail;
352 
353         while (remain > (limit - end)) {
354             out.realWriteBytes( src, (off + len) - remain, limit - end );
355             remain = remain - (limit - end);
356         }
357 
358         System.arraycopy(src, (off + len) - remain, buff, end, remain);
359         end += remain;
360 
361     }
362 
363 
364     // -------------------- Removing data from the buffer --------------------
365 
366     public int substract()
367         throws IOException   {
368 
369         if ((end - start) == 0) {
370             if (in == null)
371                 return -1;
372             int n = in.realReadBytes( buff, 0, buff.length );
373             if (n < 0)
374                 return -1;
375         }
376 
377         return (buff[start++] & 0xFF);
378 
379     }
380 
381     public int substract(ByteChunk src)
382         throws IOException   {
383 
384         if ((end - start) == 0) {
385             if (in == null)
386                 return -1;
387             int n = in.realReadBytes( buff, 0, buff.length );
388             if (n < 0)
389                 return -1;
390         }
391 
392         int len = getLength();
393         src.append(buff, start, len);
394         start = end;
395         return len;
396 
397     }
398 
399     public int substract( byte src[], int off, int len )
400         throws IOException   {
401 
402         if ((end - start) == 0) {
403             if (in == null)
404                 return -1;
405             int n = in.realReadBytes( buff, 0, buff.length );
406             if (n < 0)
407                 return -1;
408         }
409 
410         int n = len;
411         if (len > getLength()) {
412             n = getLength();
413         }
414         System.arraycopy(buff, start, src, off, n);
415         start += n;
416         return n;
417 
418     }
419 
420 
421     /** Send the buffer to the sink. Called by append() when the limit is reached.
422      *  You can also call it explicitely to force the data to be written.
423      *
424      * @throws IOException
425      */
426     public void flushBuffer()
427     throws IOException  
428     {
429     //assert out!=null
430     if( out==null ) {
431         throw new IOException  ( "Buffer overflow, no sink " + limit + " " +
432                    buff.length  );
433     }
434     out.realWriteBytes( buff, start, end-start );
435     end=start;
436     }
437 
438     /** Make space for len chars. If len is small, allocate
439      *  a reserve space too. Never grow bigger than limit.
440      */
441     private void makeSpace(int count)
442     {
443     byte[] tmp = null;
444 
445     int newSize;
446     int desiredSize=end + count;
447 
448     // Can't grow above the limit
449     if( limit > 0 &&
450         desiredSize > limit) {
451         desiredSize=limit;
452     }
453 
454     if( buff==null ) {
455         if( desiredSize < 256 ) desiredSize=256; // take a minimum
456         buff=new byte[desiredSize];
457     }
458     
459     // limit < buf.length ( the buffer is already big )
460     // or we already have space XXX
461     if( desiredSize <= buff.length ) {
462         return;
463     }
464     // grow in larger chunks
465     if( desiredSize < 2 * buff.length ) {
466         newSize= buff.length * 2;
467         if( limit >0 &&
468         newSize > limit ) newSize=limit;
469         tmp=new byte[newSize];
470     } else {
471         newSize= buff.length * 2 + count ;
472         if( limit > 0 &&
473         newSize > limit ) newSize=limit;
474         tmp=new byte[newSize];
475     }
476     
477     System.arraycopy(buff, start, tmp, 0, end-start);
478     buff = tmp;
479     tmp = null;
480     end=end-start;
481     start=0;
482     }
483     
484     // -------------------- Conversion and getters --------------------
485 
486     public String   toString() {
487         if (null == buff) {
488             return null;
489         } else if (end-start == 0) {
490             return "";
491         }
492         return StringCache.toString(this);
493     }
494     
495     public String   toStringInternal() {
496         String   strValue=null;
497         try {
498             if( enc==null ) enc=DEFAULT_CHARACTER_ENCODING;
499             strValue = new String  ( buff, start, end-start, enc );
500             /*
501              Does not improve the speed too much on most systems,
502              it's safer to use the "clasical" new String().
503              
504              Most overhead is in creating char[] and copying,
505              the internal implementation of new String() is very close to
506              what we do. The decoder is nice for large buffers and if
507              we don't go to String ( so we can take advantage of reduced GC)
508              
509              // Method is commented out, in:
510               return B2CConverter.decodeString( enc );
511               */
512         } catch (java.io.UnsupportedEncodingException   e) {
513             // Use the platform encoding in that case; the usage of a bad
514             // encoding will have been logged elsewhere already
515             strValue = new String  (buff, start, end-start);
516         }
517         return strValue;
518     }
519 
520     public int getInt()
521     {
522     return Ascii.parseInt(buff, start,end-start);
523     }
524 
525     public long getLong() {
526         return Ascii.parseLong(buff, start,end-start);
527     }
528 
529 
530     // -------------------- equals --------------------
531 
532     /**
533      * Compares the message bytes to the specified String object.
534      * @param s the String to compare
535      * @return true if the comparison succeeded, false otherwise
536      */
537     public boolean equals(String   s) {
538     // XXX ENCODING - this only works if encoding is UTF8-compat
539     // ( ok for tomcat, where we compare ascii - header names, etc )!!!
540     
541     byte[] b = buff;
542     int blen = end-start;
543     if (b == null || blen != s.length()) {
544         return false;
545     }
546     int boff = start;
547     for (int i = 0; i < blen; i++) {
548         if (b[boff++] != s.charAt(i)) {
549         return false;
550         }
551     }
552     return true;
553     }
554 
555     /**
556      * Compares the message bytes to the specified String object.
557      * @param s the String to compare
558      * @return true if the comparison succeeded, false otherwise
559      */
560     public boolean equalsIgnoreCase(String   s) {
561     byte[] b = buff;
562     int blen = end-start;
563     if (b == null || blen != s.length()) {
564         return false;
565     }
566     int boff = start;
567     for (int i = 0; i < blen; i++) {
568         if (Ascii.toLower(b[boff++]) != Ascii.toLower(s.charAt(i))) {
569         return false;
570         }
571     }
572     return true;
573     }
574 
575     public boolean equals( ByteChunk bb ) {
576     return equals( bb.getBytes(), bb.getStart(), bb.getLength());
577     }
578     
579     public boolean equals( byte b2[], int off2, int len2) {
580     byte b1[]=buff;
581     if( b1==null && b2==null ) return true;
582 
583     int len=end-start;
584     if ( len2 != len || b1==null || b2==null ) 
585         return false;
586         
587     int off1 = start;
588 
589     while ( len-- > 0) {
590         if (b1[off1++] != b2[off2++]) {
591         return false;
592         }
593     }
594     return true;
595     }
596 
597     public boolean equals( CharChunk cc ) {
598     return equals( cc.getChars(), cc.getStart(), cc.getLength());
599     }
600     
601     public boolean equals( char c2[], int off2, int len2) {
602     // XXX works only for enc compatible with ASCII/UTF !!!
603     byte b1[]=buff;
604     if( c2==null && b1==null ) return true;
605     
606     if (b1== null || c2==null || end-start != len2 ) {
607         return false;
608     }
609     int off1 = start;
610     int len=end-start;
611     
612     while ( len-- > 0) {
613         if ( (char)b1[off1++] != c2[off2++]) {
614         return false;
615         }
616     }
617     return true;
618     }
619 
620     /**
621      * Returns true if the message bytes starts with the specified string.
622      * @param s the string
623      */
624     public boolean startsWith(String   s) {
625     // Works only if enc==UTF
626     byte[] b = buff;
627     int blen = s.length();
628     if (b == null || blen > end-start) {
629         return false;
630     }
631     int boff = start;
632     for (int i = 0; i < blen; i++) {
633         if (b[boff++] != s.charAt(i)) {
634         return false;
635         }
636     }
637     return true;
638     }
639 
640     /* Returns true if the message bytes start with the specified byte array */
641     public boolean startsWith(byte[] b2) {
642         byte[] b1 = buff;
643         if (b1 == null && b2 == null) {
644             return true;
645         }
646 
647         int len = end - start;
648         if (b1 == null || b2 == null || b2.length > len) {
649             return false;
650         }
651         for (int i = start, j = 0; i < end && j < b2.length; ) {
652             if (b1[i++] != b2[j++]) 
653                 return false;
654         }
655         return true;
656     }
657 
658     /**
659      * Returns true if the message bytes starts with the specified string.
660      * @param s the string
661      * @param pos The position
662      */
663     public boolean startsWithIgnoreCase(String   s, int pos) {
664     byte[] b = buff;
665     int len = s.length();
666     if (b == null || len+pos > end-start) {
667         return false;
668     }
669     int off = start+pos;
670     for (int i = 0; i < len; i++) {
671         if (Ascii.toLower( b[off++] ) != Ascii.toLower( s.charAt(i))) {
672         return false;
673         }
674     }
675     return true;
676     }
677 
678     public int indexOf( String   src, int srcOff, int srcLen, int myOff ) {
679     char first=src.charAt( srcOff );
680 
681     // Look for first char 
682     int srcEnd = srcOff + srcLen;
683         
684     for( int i=myOff+start; i <= (end - srcLen); i++ ) {
685         if( buff[i] != first ) continue;
686         // found first char, now look for a match
687             int myPos=i+1;
688         for( int srcPos=srcOff + 1; srcPos< srcEnd; ) {
689                 if( buff[myPos++] != src.charAt( srcPos++ ))
690             break;
691                 if( srcPos==srcEnd ) return i-start; // found it
692         }
693     }
694     return -1;
695     }
696 
697     // -------------------- Hash code  --------------------
698 
699     // normal hash. 
700     public int hash() {
701     return hashBytes( buff, start, end-start);
702     }
703 
704     // hash ignoring case
705     public int hashIgnoreCase() {
706     return hashBytesIC( buff, start, end-start );
707     }
708 
709     private static int hashBytes( byte buff[], int start, int bytesLen ) {
710     int max=start+bytesLen;
711     byte bb[]=buff;
712     int code=0;
713     for (int i = start; i < max ; i++) {
714         code = code * 37 + bb[i];
715     }
716     return code;
717     }
718 
719     private static int hashBytesIC( byte bytes[], int start,
720                     int bytesLen )
721     {
722     int max=start+bytesLen;
723     byte bb[]=bytes;
724     int code=0;
725     for (int i = start; i < max ; i++) {
726         code = code * 37 + Ascii.toLower(bb[i]);
727     }
728     return code;
729     }
730 
731     /**
732      * Returns true if the message bytes starts with the specified string.
733      * @param c the character
734      * @param starting The start position
735      */
736     public int indexOf(char c, int starting) {
737     int ret = indexOf( buff, start+starting, end, c);
738     return (ret >= start) ? ret - start : -1;
739     }
740 
741     public static int  indexOf( byte bytes[], int off, int end, char qq )
742     {
743     // Works only for UTF 
744     while( off < end ) {
745         byte b=bytes[off];
746         if( b==qq )
747         return off;
748         off++;
749     }
750     return -1;
751     }
752 
753     /** Find a character, no side effects.
754      *  @return index of char if found, -1 if not
755      */
756     public static int findChar( byte buf[], int start, int end, char c ) {
757     byte b=(byte)c;
758     int offset = start;
759     while (offset < end) {
760         if (buf[offset] == b) {
761         return offset;
762         }
763         offset++;
764     }
765     return -1;
766     }
767 
768     /** Find a character, no side effects.
769      *  @return index of char if found, -1 if not
770      */
771     public static int findChars( byte buf[], int start, int end, byte c[] ) {
772     int clen=c.length;
773     int offset = start;
774     while (offset < end) {
775         for( int i=0; i<clen; i++ ) 
776         if (buf[offset] == c[i]) {
777             return offset;
778         }
779         offset++;
780     }
781     return -1;
782     }
783 
784     /** Find the first character != c 
785      *  @return index of char if found, -1 if not
786      */
787     public static int findNotChars( byte buf[], int start, int end, byte c[] )
788     {
789     int clen=c.length;
790     int offset = start;
791     boolean found;
792         
793     while (offset < end) {
794         found=true;
795         for( int i=0; i<clen; i++ ) {
796         if (buf[offset] == c[i]) {
797             found=false;
798             break;
799         }
800         }
801         if( found ) { // buf[offset] != c[0..len]
802         return offset;
803         }
804         offset++;
805     }
806     return -1;
807     }
808 
809 
810     /**
811      * Convert specified String to a byte array. This ONLY WORKS for ascii, UTF chars will be truncated.
812      * 
813      * @param value to convert to byte array
814      * @return the byte array value
815      */
816     public static final byte[] convertToBytes(String   value) {
817         byte[] result = new byte[value.length()];
818         for (int i = 0; i < value.length(); i++) {
819             result[i] = (byte) value.charAt(i);
820         }
821         return result;
822     }
823     
824     
825 }
826
A to Z: JavaDoc & Examples Daily Java News & Articles Open Source Projects Open Source Codes Free Computer Books Remove Frame
Popular Tags