KickJava   Java API By Example, From Geeks To Geeks.

Java > Open Source Codes > org > apache > geronimo > interop > util > UTF8


1 /**
2  *
3  * Copyright 2004-2005 The Apache Software Foundation
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  *
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */

18 package org.apache.geronimo.interop.util;
19
20 public abstract class UTF8 {
21     public static byte[] fromString(String JavaDoc value) {
22         int n = value.length(), u = 0;
23         for (int i = 0; i < n; i++) {
24             int c = value.charAt(i);
25             if (c >= 0x0001 && c <= 0x007F) {
26                 u++;
27             } else if (c > 0x07FF) {
28                 u += 3;
29             } else {
30                 u += 2;
31             }
32         }
33         byte[] bytes = new byte[u];
34         for (int i = 0, j = 0; i < n; i++) {
35             int c = value.charAt(i);
36             if (c >= 0x0001 && c <= 0x007F) {
37                 bytes[j++] = (byte) c;
38             } else if (c > 0x07FF) {
39                 bytes[j++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
40                 bytes[j++] = (byte) (0x80 | ((c >> 6) & 0x3F));
41                 bytes[j++] = (byte) (0x80 | (c & 0x3F));
42             } else {
43                 bytes[j++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
44                 bytes[j++] = (byte) (0x80 | (c & 0x3F));
45             }
46         }
47         return bytes;
48     }
49
50     /**
51      * * If there is sufficient space in buffer from offset to convert value
52      * * without allocating a new byte array, do so now and return the number
53      * * of bytes written. Otherwise return -1. This method is intended for
54      * * use in optimized string marshalling.
55      */

56     public static int fromString(String JavaDoc value, byte[] buffer, int offset, int length) {
57         int n = value.length(), j = offset;
58         for (int i = 0; i < n; i++) {
59             if (j + 3 > length) {
60                 return -1;
61             }
62             int c = value.charAt(i);
63             if (c >= 0x0001 && c <= 0x007F) {
64                 buffer[j++] = (byte) c;
65             } else if (c > 0x07FF) {
66                 buffer[j++] = (byte) (0xE0 | ((c >> 12) & 0x0F));
67                 buffer[j++] = (byte) (0x80 | ((c >> 6) & 0x3F));
68                 buffer[j++] = (byte) (0x80 | (c & 0x3F));
69             } else {
70                 buffer[j++] = (byte) (0xC0 | ((c >> 6) & 0x1F));
71                 buffer[j++] = (byte) (0x80 | (c & 0x3F));
72             }
73         }
74         return j - offset;
75     }
76
77     public static String JavaDoc toString(byte[] value) {
78         return toString(value, 0, value.length);
79     }
80
81     public static String JavaDoc toString(byte[] value, int offset, int length) {
82         int n = offset + length, j = 0;
83         char[] chars = new char[length]; // May be more than we need, but not less
84
for (int i = offset; i < n; i++) {
85             int c = (value[i] + 256) & 255; // byte is signed, we need unsigned
86
int c2, c3;
87
88             switch (c >> 4) {
89                 case 0:
90                 case 1:
91                 case 2:
92                 case 3:
93                 case 4:
94                 case 5:
95                 case 6:
96                 case 7:
97                     // 0xxx xxxx
98
chars[j++] = (char) c;
99                     break;
100
101                 case 12:
102                 case 13:
103                     // 110x xxxx 10xx xxxx
104
if (i + 1 >= n) {
105                         badUtf8Data();
106                     }
107                     c2 = (value[++i] + 256) & 255; // byte is signed, we need unsigned
108
if ((c2 & 0xC0) != 0x80) {
109                         badUtf8Data();
110                     }
111                     chars[j++] = (char) (((c & 0x1F) << 6) | (c2 & 0x3F));
112                     break;
113
114                 case 14:
115                     // 1110 xxxx 10xx xxxx 10xx xxxx
116
if (i + 2 >= n) {
117                         badUtf8Data();
118                     }
119                     c2 = (value[++i] + 256) & 255; // byte is signed, we need unsigned
120
c3 = (value[++i] + 256) & 255; // byte is signed, we need unsigned
121
if ((c2 & 0xC0) != 0x80 || (c3 & 0xC0) != 0x80) {
122                         badUtf8Data();
123                     }
124                     chars[j++] = (char) (((c & 0x0F) << 12)
125                                          | ((c2 & 0x3F) << 6)
126                                          | (c3 & 0x3F));
127                     break;
128
129                 default:
130                     badUtf8Data();
131             }
132         }
133         return new String JavaDoc(chars, 0, j);
134     }
135
136     private static void badUtf8Data() {
137         throw new org.omg.CORBA.MARSHAL JavaDoc("bad UTF-8 data");
138     }
139 }
140
Popular Tags