1   
2   
3   
4   
5   
6   
7   
8   
9   
10  
11  
12  
13  
14  
15  
16  
17  
18  
19  
20  
21  
22  
23  
24  
25  
26  
27  
28  
29  
30  package org.apache.commons.httpclient.util;
31  
32  import java.io.UnsupportedEncodingException;
33  
34  import org.apache.commons.codec.net.URLCodec;
35  import org.apache.commons.httpclient.HttpClientError;
36  import org.apache.commons.httpclient.NameValuePair;
37  import org.apache.commons.logging.Log;
38  import org.apache.commons.logging.LogFactory;
39  
40  /**
41   * The home for utility methods that handle various encoding tasks.
42   * 
43   * @author Michael Becke
44   * @author <a href="mailto:oleg@ural.ru">Oleg Kalnichevski</a>
45   * 
46   * @since 2.0 final
47   */
48  public class EncodingUtil {
49  
50      /** Default content encoding chatset */
51      private static final String DEFAULT_CHARSET = "ISO-8859-1";
52  
53      /** Log object for this class. */
54      private static final Log LOG = LogFactory.getLog(EncodingUtil.class);
55  
56      /**
57       * Form-urlencoding routine.
58       *
59       * The default encoding for all forms is `application/x-www-form-urlencoded'. 
60       * A form data set is represented in this media type as follows:
61       *
62       * The form field names and values are escaped: space characters are replaced 
63       * by `+', and then reserved characters are escaped as per [URL]; that is, 
64       * non-alphanumeric characters are replaced by `%HH', a percent sign and two 
65       * hexadecimal digits representing the ASCII code of the character. Line breaks, 
66       * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
67       * 
68       * <p>
69       * if the given charset is not supported, ISO-8859-1 is used instead.
70       * </p>
71       * 
72       * @param pairs the values to be encoded
73       * @param charset the character set of pairs to be encoded
74       * 
75       * @return the urlencoded pairs
76       * 
77       * @since 2.0 final
78       */
79       public static String formUrlEncode(NameValuePair[] pairs, String charset) {
80          try {
81              return doFormUrlEncode(pairs, charset);
82          } catch (UnsupportedEncodingException e) {
83              LOG.error("Encoding not supported: " + charset);
84              try {
85                  return doFormUrlEncode(pairs, DEFAULT_CHARSET);
86              } catch (UnsupportedEncodingException fatal) {
87                  
88                  throw new HttpClientError("Encoding not supported: " + 
89                      DEFAULT_CHARSET);
90              }
91          }
92      }
93  
94      /**
95       * Form-urlencoding routine.
96       *
97       * The default encoding for all forms is `application/x-www-form-urlencoded'. 
98       * A form data set is represented in this media type as follows:
99       *
100      * The form field names and values are escaped: space characters are replaced 
101      * by `+', and then reserved characters are escaped as per [URL]; that is, 
102      * non-alphanumeric characters are replaced by `%HH', a percent sign and two 
103      * hexadecimal digits representing the ASCII code of the character. Line breaks, 
104      * as in multi-line text field values, are represented as CR LF pairs, i.e. `%0D%0A'.
105      * 
106      * @param pairs the values to be encoded
107      * @param charset the character set of pairs to be encoded
108      * 
109      * @return the urlencoded pairs
110      * @throws UnsupportedEncodingException if charset is not supported
111      * 
112      * @since 2.0 final
113      */
114      private static String doFormUrlEncode(NameValuePair[] pairs, String charset)
115         throws UnsupportedEncodingException 
116      {
117         StringBuffer buf = new StringBuffer();
118         for (int i = 0; i < pairs.length; i++) {
119             URLCodec codec = new URLCodec();
120             NameValuePair pair = pairs[i];
121             if (pair.getName() != null) {
122                 if (i > 0) {
123                     buf.append("&");
124                 }
125                 buf.append(codec.encode(pair.getName(), charset));
126                 buf.append("=");
127                 if (pair.getValue() != null) {
128                     buf.append(codec.encode(pair.getValue(), charset));
129                 }
130             }
131         }
132         return buf.toString();
133     }
134     
135     /**
136      * Converts the byte array of HTTP content characters to a string. If
137      * the specified charset is not supported, default system encoding
138      * is used.
139      *
140      * @param data the byte array to be encoded
141      * @param offset the index of the first byte to encode
142      * @param length the number of bytes to encode 
143      * @param charset the desired character encoding
144      * @return The result of the conversion.
145      * 
146      * @since 3.0
147      */
148     public static String getString(
149         final byte[] data, 
150         int offset, 
151         int length, 
152         String charset
153     ) {
154 
155         if (data == null) {
156             throw new IllegalArgumentException("Parameter may not be null");
157         }
158 
159         if (charset == null || charset.length() == 0) {
160             throw new IllegalArgumentException("charset may not be null or empty");
161         }
162 
163         try {
164             return new String(data, offset, length, charset);
165         } catch (UnsupportedEncodingException e) {
166 
167             if (LOG.isWarnEnabled()) {
168                 LOG.warn("Unsupported encoding: " + charset + ". System encoding used");
169             }
170             return new String(data, offset, length);
171         }
172     }
173 
174 
175     /**
176      * Converts the byte array of HTTP content characters to a string. If
177      * the specified charset is not supported, default system encoding
178      * is used.
179      *
180      * @param data the byte array to be encoded
181      * @param charset the desired character encoding
182      * @return The result of the conversion.
183      * 
184      * @since 3.0
185      */
186     public static String getString(final byte[] data, String charset) {
187         return getString(data, 0, data.length, charset);
188     }
189 
190     /**
191      * Converts the specified string to a byte array.  If the charset is not supported the
192      * default system charset is used.
193      *
194      * @param data the string to be encoded
195      * @param charset the desired character encoding
196      * @return The resulting byte array.
197      * 
198      * @since 3.0
199      */
200     public static byte[] getBytes(final String data, String charset) {
201 
202         if (data == null) {
203             throw new IllegalArgumentException("data may not be null");
204         }
205 
206         if (charset == null || charset.length() == 0) {
207             throw new IllegalArgumentException("charset may not be null or empty");
208         }
209 
210         try {
211             return data.getBytes(charset);
212         } catch (UnsupportedEncodingException e) {
213 
214             if (LOG.isWarnEnabled()) {
215                 LOG.warn("Unsupported encoding: " + charset + ". System encoding used.");
216             }
217             
218             return data.getBytes();
219         }
220     }    
221     
222     /**
223      * Converts the specified string to byte array of ASCII characters.
224      *
225      * @param data the string to be encoded
226      * @return The string as a byte array.
227      * 
228      * @since 3.0
229      */
230     public static byte[] getAsciiBytes(final String data) {
231 
232         if (data == null) {
233             throw new IllegalArgumentException("Parameter may not be null");
234         }
235 
236         try {
237             return data.getBytes("US-ASCII");
238         } catch (UnsupportedEncodingException e) {
239             throw new HttpClientError("HttpClient requires ASCII support");
240         }
241     }
242 
243     /**
244      * Converts the byte array of ASCII characters to a string. This method is
245      * to be used when decoding content of HTTP elements (such as response
246      * headers)
247      *
248      * @param data the byte array to be encoded
249      * @param offset the index of the first byte to encode
250      * @param length the number of bytes to encode 
251      * @return The string representation of the byte array
252      * 
253      * @since 3.0
254      */
255     public static String getAsciiString(final byte[] data, int offset, int length) {
256 
257         if (data == null) {
258             throw new IllegalArgumentException("Parameter may not be null");
259         }
260 
261         try {
262             return new String(data, offset, length, "US-ASCII");
263         } catch (UnsupportedEncodingException e) {
264             throw new HttpClientError("HttpClient requires ASCII support");
265         }
266     }
267 
268     /**
269      * Converts the byte array of ASCII characters to a string. This method is
270      * to be used when decoding content of HTTP elements (such as response
271      * headers)
272      *
273      * @param data the byte array to be encoded
274      * @return The string representation of the byte array
275      * 
276      * @since 3.0
277      */
278     public static String getAsciiString(final byte[] data) {
279         return getAsciiString(data, 0, data.length);
280     }
281 
282     /**
283      * This class should not be instantiated.
284      */
285     private EncodingUtil() {
286     }
287 
288 }