View Javadoc

1   // ========================================================================
2   // Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // Licensed under the Apache License, Version 2.0 (the "License");
5   // you may not use this file except in compliance with the License.
6   // You may obtain a copy of the License at 
7   // http://www.apache.org/licenses/LICENSE-2.0
8   // Unless required by applicable law or agreed to in writing, software
9   // distributed under the License is distributed on an "AS IS" BASIS,
10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  // See the License for the specific language governing permissions and
12  // limitations under the License.
13  // ========================================================================
14  
15  package org.mortbay.util;
16  
17  import java.io.IOException;
18  import java.io.InputStream;
19  import java.io.InputStreamReader;
20  import java.io.StringWriter;
21  import java.io.UnsupportedEncodingException;
22  import java.util.Iterator;
23  import java.util.Map;
24  
25  import org.mortbay.log.Log;
26  
27  
28  /* ------------------------------------------------------------ */
29  /** Handles coding of MIME  "x-www-form-urlencoded".
30   * This class handles the encoding and decoding for either
31   * the query string of a URL or the _content of a POST HTTP request.
32   *
33   * <p><h4>Notes</h4>
34   * The hashtable either contains String single values, vectors
35   * of String or arrays of Strings.
36   *
37   * This class is only partially synchronised.  In particular, simple
38   * get operations are not protected from concurrent updates.
39   *
40   * @see java.net.URLEncoder
41   * @author Greg Wilkins (gregw)
42   */
43  public class UrlEncoded extends MultiMap
44  {
45  
46      /* ----------------------------------------------------------------- */
47      public UrlEncoded(UrlEncoded url)
48      {
49          super(url);
50      }
51      
52      /* ----------------------------------------------------------------- */
53      public UrlEncoded()
54      {
55          super(6);
56      }
57      
58      /* ----------------------------------------------------------------- */
59      public UrlEncoded(String s)
60      {
61          super(6);
62          decode(s,StringUtil.__UTF8);
63      }
64      
65      /* ----------------------------------------------------------------- */
66      public UrlEncoded(String s, String charset)
67      {
68          super(6);
69          decode(s,charset);
70      }
71      
72      /* ----------------------------------------------------------------- */
73      public void decode(String query)
74      {
75          decodeTo(query,this,StringUtil.__UTF8);
76      }
77      
78      /* ----------------------------------------------------------------- */
79      public void decode(String query,String charset)
80      {
81          decodeTo(query,this,charset);
82      }
83      
84      /* -------------------------------------------------------------- */
85      /** Encode Hashtable with % encoding.
86       */
87      public String encode()
88      {
89          return encode(StringUtil.__UTF8,false);
90      }
91      
92      /* -------------------------------------------------------------- */
93      /** Encode Hashtable with % encoding.
94       */
95      public String encode(String charset)
96      {
97          return encode(charset,false);
98      }
99      
100     /* -------------------------------------------------------------- */
101     /** Encode Hashtable with % encoding.
102      * @param equalsForNullValue if True, then an '=' is always used, even
103      * for parameters without a value. e.g. "blah?a=&b=&c=".
104      */
105     public synchronized String encode(String charset, boolean equalsForNullValue)
106     {
107         return encode(this,charset,equalsForNullValue);
108     }
109     
110     /* -------------------------------------------------------------- */
111     /** Encode Hashtable with % encoding.
112      * @param equalsForNullValue if True, then an '=' is always used, even
113      * for parameters without a value. e.g. "blah?a=&b=&c=".
114      */
115     public static String encode(MultiMap map, String charset, boolean equalsForNullValue)
116     {
117         if (charset==null)
118             charset=StringUtil.__UTF8;
119         
120         StringBuffer result = new StringBuffer(128);
121         synchronized(result)
122         {
123             Iterator iter = map.entrySet().iterator();
124             while(iter.hasNext())
125             {
126                 Map.Entry entry = (Map.Entry)iter.next();
127                 
128                 String key = entry.getKey().toString();
129                 Object list = entry.getValue();
130                 int s=LazyList.size(list);
131                 
132                 if (s==0)
133                 {
134                     result.append(encodeString(key,charset));
135                     if(equalsForNullValue)
136                         result.append('=');
137                 }
138                 else
139                 {
140                     for (int i=0;i<s;i++)
141                     {
142                         if (i>0)
143                             result.append('&');
144                         Object val=LazyList.get(list,i);
145                         result.append(encodeString(key,charset));
146 
147                         if (val!=null)
148                         {
149                             String str=val.toString();
150                             if (str.length()>0)
151                             {
152                                 result.append('=');
153                                 result.append(encodeString(str,charset));
154                             }
155                             else if (equalsForNullValue)
156                                 result.append('=');
157                         }
158                         else if (equalsForNullValue)
159                             result.append('=');
160                     }
161                 }
162                 if (iter.hasNext())
163                     result.append('&');
164             }
165             return result.toString();
166         }
167     }
168 
169 
170     /* -------------------------------------------------------------- */
171     /** Decoded parameters to Map.
172      * @param content the string containing the encoded parameters
173      */
174     public static void decodeTo(String content, MultiMap map, String charset)
175     {
176         if (charset==null)
177             charset=StringUtil.__UTF8;
178 
179         synchronized(map)
180         {
181             String key = null;
182             String value = null;
183             int mark=-1;
184             boolean encoded=false;
185             for (int i=0;i<content.length();i++)
186             {
187                 char c = content.charAt(i);
188                 switch (c)
189                 {
190                   case '&':
191                       int l=i-mark-1;
192                       value = l==0?"":
193                           (encoded?decodeString(content,mark+1,l,charset):content.substring(mark+1,i));
194                       mark=i;
195                       encoded=false;
196                       if (key != null)
197                       {
198                           map.add(key,value);
199                       }
200                       else if (value!=null&&value.length()>0)
201                       {
202                           map.add(value,"");
203                       }
204                       key = null;
205                       value=null;
206                       break;
207                   case '=':
208                       if (key!=null)
209                           break;
210                       key = encoded?decodeString(content,mark+1,i-mark-1,charset):content.substring(mark+1,i);
211                       mark=i;
212                       encoded=false;
213                       break;
214                   case '+':
215                       encoded=true;
216                       break;
217                   case '%':
218                       encoded=true;
219                       break;
220                 }                
221             }
222             
223             if (key != null)
224             {
225                 int l=content.length()-mark-1;
226                 value = l==0?"":(encoded?decodeString(content,mark+1,l,charset):content.substring(mark+1));
227                 map.add(key,value);
228             }
229             else if (mark<content.length())
230             {
231                 key = encoded
232                     ?decodeString(content,mark+1,content.length()-mark-1,charset)
233                     :content.substring(mark+1);
234                 map.add(key,"");
235             }
236         }
237     }
238 
239     /* -------------------------------------------------------------- */
240     /** Decoded parameters to Map.
241      * @param data the byte[] containing the encoded parameters
242      */
243     public static void decodeUtf8To(byte[] raw,int offset, int length, MultiMap map)
244     {
245         decodeUtf8To(raw,offset,length,map,new Utf8StringBuffer());
246     }
247 
248     /* -------------------------------------------------------------- */
249     /** Decoded parameters to Map.
250      * @param data the byte[] containing the encoded parameters
251      */
252     public static void decodeUtf8To(byte[] raw,int offset, int length, MultiMap map,Utf8StringBuffer buffer)
253     {
254         synchronized(map)
255         {
256             String key = null;
257             String value = null;
258             
259             // TODO cache of parameter names ???
260             int end=offset+length;
261             for (int i=offset;i<end;i++)
262             {
263                 byte b=raw[i];
264                 switch ((char)(0xff&b))
265                 {
266                     case '&':
267                         value = buffer.length()==0?"":buffer.toString();
268                         buffer.reset();
269                         if (key != null)
270                         {
271                             map.add(key,value);
272                         }
273                         else if (value!=null&&value.length()>0)
274                         {
275                             map.add(value,"");
276                         }
277                         key = null;
278                         value=null;
279                         break;
280                         
281                     case '=':
282                         if (key!=null)
283                         {
284                             buffer.append(b);
285                             break;
286                         }
287                         key = buffer.toString();
288                         buffer.reset();
289                         break;
290                         
291                     case '+':
292                         buffer.append((byte)' ');
293                         break;
294                         
295                     case '%':
296                         if (i+2<end)
297                             buffer.append((byte)((TypeUtil.convertHexDigit(raw[++i])<<4) + TypeUtil.convertHexDigit(raw[++i])));
298                         break;
299                     default:
300                         buffer.append(b);
301                     break;
302                 }
303             }
304             
305             if (key != null)
306             {
307                 value = buffer.length()==0?"":buffer.toString();
308                 buffer.reset();
309                 map.add(key,value);
310             }
311             else if (buffer.length()>0)
312             {
313                 map.add(buffer.toString(),"");
314             }
315         }
316     }
317 
318     /* -------------------------------------------------------------- */
319     /** Decoded parameters to Map.
320      * @param in InputSteam to read
321      * @param map MultiMap to add parameters to
322      * @param maxLength maximum length of content to read 0r -1 for no limit
323      */
324     public static void decode88591To(InputStream in, MultiMap map, int maxLength)
325     throws IOException
326     {
327         synchronized(map)
328         {
329             StringBuffer buffer = new StringBuffer();
330             String key = null;
331             String value = null;
332             
333             int b;
334 
335             // TODO cache of parameter names ???
336             int totalLength=0;
337             while ((b=in.read())>=0)
338             {
339                 switch ((char) b)
340                 {
341                     case '&':
342                         value = buffer.length()==0?"":buffer.toString();
343                         buffer.setLength(0);
344                         if (key != null)
345                         {
346                             map.add(key,value);
347                         }
348                         else if (value!=null&&value.length()>0)
349                         {
350                             map.add(value,"");
351                         }
352                         key = null;
353                         value=null;
354                         break;
355                         
356                     case '=':
357                         if (key!=null)
358                         {
359                             buffer.append((char)b);
360                             break;
361                         }
362                         key = buffer.toString();
363                         buffer.setLength(0);
364                         break;
365                         
366                     case '+':
367                         buffer.append((char)' ');
368                         break;
369                         
370                     case '%':
371                         int dh=in.read();
372                         int dl=in.read();
373                         if (dh<0||dl<0)
374                             break;
375                         buffer.append((char)((TypeUtil.convertHexDigit((byte)dh)<<4) + TypeUtil.convertHexDigit((byte)dl)));
376                         break;
377                     default:
378                         buffer.append((char)b);
379                     break;
380                 }
381                 if (maxLength>=0 && (++totalLength > maxLength))
382                     throw new IllegalStateException("Form too large");
383             }
384             
385             if (key != null)
386             {
387                 value = buffer.length()==0?"":buffer.toString();
388                 buffer.setLength(0);
389                 map.add(key,value);
390             }
391             else if (buffer.length()>0)
392             {
393                 map.add(buffer.toString(), "");
394             }
395         }
396     }
397     
398     /* -------------------------------------------------------------- */
399     /** Decoded parameters to Map.
400      * @param in InputSteam to read
401      * @param map MultiMap to add parameters to
402      * @param maxLength maximum length of conent to read 0r -1 for no limit
403      */
404     public static void decodeUtf8To(InputStream in, MultiMap map, int maxLength)
405     throws IOException
406     {
407         synchronized(map)
408         {
409             Utf8StringBuffer buffer = new Utf8StringBuffer();
410             String key = null;
411             String value = null;
412             
413             int b;
414             
415             // TODO cache of parameter names ???
416             int totalLength=0;
417             while ((b=in.read())>=0)
418             {
419                 switch ((char) b)
420                 {
421                     case '&':
422                         value = buffer.length()==0?"":buffer.toString();
423                         buffer.reset();
424                         if (key != null)
425                         {
426                             map.add(key,value);
427                         }
428                         else if (value!=null&&value.length()>0)
429                         {
430                             map.add(value,"");
431                         }
432                         key = null;
433                         value=null;
434                         break;
435                         
436                     case '=':
437                         if (key!=null)
438                         {
439                             buffer.append((byte)b);
440                             break;
441                         }
442                         key = buffer.toString();
443                         buffer.reset();
444                         break;
445                         
446                     case '+':
447                         buffer.append((byte)' ');
448                         break;
449                         
450                     case '%':
451                         int dh=in.read();
452                         int dl=in.read();
453                         if (dh<0||dl<0)
454                             break;
455                         buffer.append((byte)((TypeUtil.convertHexDigit((byte)dh)<<4) + TypeUtil.convertHexDigit((byte)dl)));
456                         break;
457                     default:
458                         buffer.append((byte)b);
459                     break;
460                 }
461                 if (maxLength>=0 && (++totalLength > maxLength))
462                     throw new IllegalStateException("Form too large");
463             }
464             
465             if (key != null)
466             {
467                 value = buffer.length()==0?"":buffer.toString();
468                 buffer.reset();
469                 map.add(key,value);
470             }
471             else if (buffer.length()>0)
472             {
473                 map.add(buffer.toString(), "");
474             }
475         }
476     }
477     
478     /* -------------------------------------------------------------- */
479     public static void decodeUtf16To(InputStream in, MultiMap map, int maxLength) throws IOException
480     {
481         InputStreamReader input = new InputStreamReader(in,StringUtil.__UTF16);
482         StringBuffer buf = new StringBuffer();
483 
484         int c;
485         int length=0;
486         if (maxLength<0)
487             maxLength=Integer.MAX_VALUE;
488         while ((c=input.read())>0 && length++<maxLength)
489             buf.append((char)c);
490         decodeTo(buf.toString(),map,StringUtil.__UTF8);
491     }
492     
493     /* -------------------------------------------------------------- */
494     /** Decoded parameters to Map.
495      * @param in the stream containing the encoded parameters
496      */
497     public static void decodeTo(InputStream in, MultiMap map, String charset, int maxLength)
498     throws IOException
499     {
500         if (charset==null || StringUtil.__ISO_8859_1.equals(charset))
501         {
502             decode88591To(in,map,maxLength);
503             return;
504         }
505 
506         if (StringUtil.__UTF8.equalsIgnoreCase(charset))
507         {
508             decodeUtf8To(in,map,maxLength);
509             return;
510         }
511 
512         if (StringUtil.__UTF16.equalsIgnoreCase(charset)) // Should be all 2 byte encodings
513         {
514             decodeUtf16To(in,map,maxLength);
515             return;
516         }
517         
518 
519         synchronized(map)
520         {
521             String key = null;
522             String value = null;
523             
524             int c;
525             int digit=0;
526             int digits=0;
527             
528             int totalLength = 0;
529             ByteArrayOutputStream2 output = new ByteArrayOutputStream2();
530             
531             int size=0;
532             
533             while ((c=in.read())>0)
534             {
535                 switch ((char) c)
536                 {
537                     case '&':
538                         size=output.size();
539                         value = size==0?"":output.toString(charset);
540                         output.setCount(0);
541                         if (key != null)
542                         {
543                             map.add(key,value);
544                         }
545                         else if (value!=null&&value.length()>0)
546                         {
547                             map.add(value,"");
548                         }
549                         key = null;
550                         value=null;
551                         break;
552                     case '=':
553                         if (key!=null)
554                         {
555                             output.write(c);
556                             break;
557                         }
558                         size=output.size();
559                         key = size==0?"":output.toString(charset);
560                         output.setCount(0);
561                         break;
562                     case '+':
563                         output.write(' ');
564                         break;
565                     case '%':
566                         digits=2;
567                         break;
568                     default:
569                         if (digits==2)
570                         {
571                             digit=TypeUtil.convertHexDigit((byte)c);
572                             digits=1;
573                         }
574                         else if (digits==1)
575                         {
576                             output.write((digit<<4) + TypeUtil.convertHexDigit((byte)c));
577                             digits=0;
578                         }
579                         else
580                             output.write(c);
581                     break;
582                 }
583                 
584                 totalLength++;
585                 if (maxLength>=0 && totalLength > maxLength)
586                     throw new IllegalStateException("Form too large");
587             }
588 
589             size=output.size();
590             if (key != null)
591             {
592                 value = size==0?"":output.toString(charset);
593                 output.setCount(0);
594                 map.add(key,value);
595             }
596             else if (size>0)
597                 map.add(output.toString(charset),"");
598         }
599     }
600     
601     /* -------------------------------------------------------------- */
602     /** Decode String with % encoding.
603      * This method makes the assumption that the majority of calls
604      * will need no decoding.
605      */
606     public static String decodeString(String encoded,int offset,int length,String charset)
607     {
608         if (charset==null || StringUtil.isUTF8(charset))
609         {
610             Utf8StringBuffer buffer=null;
611 
612             for (int i=0;i<length;i++)
613             {
614                 char c = encoded.charAt(offset+i);
615                 if (c<0||c>0xff)
616                 {
617                     if (buffer==null)
618                     {
619                         buffer=new Utf8StringBuffer(length);
620                         buffer.getStringBuffer().append(encoded.substring(offset,offset+i+1));
621                     }
622                     else
623                         buffer.getStringBuffer().append(c);
624                 }
625                 else if (c=='+')
626                 {
627                     if (buffer==null)
628                     {
629                         buffer=new Utf8StringBuffer(length);
630                         buffer.getStringBuffer().append(encoded.substring(offset,offset+i));
631                     }
632                     
633                     buffer.getStringBuffer().append(' ');
634                 }
635                 else if (c=='%' && (i+2)<length)
636                 {
637                     if (buffer==null)
638                     {
639                         buffer=new Utf8StringBuffer(length);
640                         buffer.getStringBuffer().append(encoded.substring(offset,offset+i));
641                     }
642 
643                     while(c=='%' && (i+2)<length)
644                     {
645                         try
646                         {
647                             byte b=(byte)TypeUtil.parseInt(encoded,offset+i+1,2,16);
648                             buffer.append(b);
649                             i+=3;
650                         }
651                         catch(NumberFormatException nfe)
652                         {
653                             buffer.getStringBuffer().append('%');
654                             for(char next; ((next=encoded.charAt(++i+offset))!='%');)
655                                 buffer.getStringBuffer().append((next=='+' ? ' ' : next));
656                         }
657 
658                         if (i<length)
659                             c = encoded.charAt(offset+i);
660                     }
661                     i--;
662                 }
663                 else if (buffer!=null)
664                     buffer.getStringBuffer().append(c);
665             }
666 
667             if (buffer==null)
668             {
669                 if (offset==0 && encoded.length()==length)
670                     return encoded;
671                 return encoded.substring(offset,offset+length);
672             }
673 
674             return buffer.toString();
675         }
676         else
677         {
678             StringBuffer buffer=null;
679 
680             try
681             {
682                 for (int i=0;i<length;i++)
683                 {
684                     char c = encoded.charAt(offset+i);
685                     if (c<0||c>0xff)
686                     {
687                         if (buffer==null)
688                         {
689                             buffer=new StringBuffer(length);
690                             buffer.append(encoded.substring(offset,offset+i+1));
691                         }
692                         else
693                             buffer.append(c);
694                     }
695                     else if (c=='+')
696                     {
697                         if (buffer==null)
698                         {
699                             buffer=new StringBuffer(length);
700                             buffer.append(encoded.substring(offset,offset+i));
701                         }
702                         
703                         buffer.append(' ');
704                     }
705                     else if (c=='%' && (i+2)<length)
706                     {
707                         if (buffer==null)
708                         {
709                             buffer=new StringBuffer(length);
710                             buffer.append(encoded.substring(offset,offset+i));
711                         }
712 
713                         byte[] ba=new byte[length];
714                         int n=0;
715                         while(c>=0 && c<=0xff)
716                         {
717                             if (c=='%')
718                             {   
719                                 if(i+2<length)
720                                 {
721                                     try
722                                     {
723                                         ba[n++]=(byte)TypeUtil.parseInt(encoded,offset+i+1,2,16);
724                                         i+=3;
725                                     }
726                                     catch(NumberFormatException nfe)
727                                     {                                        
728                                         ba[n-1] = (byte)'%';                                    
729                                         for(char next; ((next=encoded.charAt(++i+offset))!='%');)
730                                             ba[n++] = (byte)(next=='+' ? ' ' : next);
731                                     }
732                                 }
733                                 else
734                                 {
735                                     ba[n++] = (byte)'%';
736                                     i++;
737                                 }
738                             }
739                             else if (c=='+')
740                             {
741                                 ba[n++]=(byte)' ';
742                                 i++;
743                             }
744                             else
745                             {
746                                 ba[n++]=(byte)c;
747                                 i++;
748                             }
749                             
750                             if (i>=length)
751                                 break;
752                             c = encoded.charAt(offset+i);
753                         }
754 
755                         i--;
756                         buffer.append(new String(ba,0,n,charset));
757 
758                     }
759                     else if (buffer!=null)
760                         buffer.append(c);
761                 }
762 
763                 if (buffer==null)
764                 {
765                     if (offset==0 && encoded.length()==length)
766                         return encoded;
767                     return encoded.substring(offset,offset+length);
768                 }
769 
770                 return buffer.toString();
771             }
772             catch (UnsupportedEncodingException e)
773             {
774                 throw new RuntimeException(e);
775             }
776         }
777         
778     }
779     
780     /* ------------------------------------------------------------ */
781     /** Perform URL encoding.
782      * Assumes 8859 charset
783      * @param string 
784      * @return encoded string.
785      */
786     public static String encodeString(String string)
787     {
788         return encodeString(string,StringUtil.__UTF8);
789     }
790     
791     /* ------------------------------------------------------------ */
792     /** Perform URL encoding.
793      * @param string 
794      * @return encoded string.
795      */
796     public static String encodeString(String string,String charset)
797     {
798         if (charset==null)
799             charset=StringUtil.__UTF8;
800         byte[] bytes=null;
801         try
802         {
803             bytes=string.getBytes(charset);
804         }
805         catch(UnsupportedEncodingException e)
806         {
807             // Log.warn(LogSupport.EXCEPTION,e);
808             bytes=string.getBytes();
809         }
810         
811         int len=bytes.length;
812         byte[] encoded= new byte[bytes.length*3];
813         int n=0;
814         boolean noEncode=true;
815         
816         for (int i=0;i<len;i++)
817         {
818             byte b = bytes[i];
819             
820             if (b==' ')
821             {
822                 noEncode=false;
823                 encoded[n++]=(byte)'+';
824             }
825             else if (b>='a' && b<='z' ||
826                      b>='A' && b<='Z' ||
827                      b>='0' && b<='9')
828             {
829                 encoded[n++]=b;
830             }
831             else
832             {
833                 noEncode=false;
834                 encoded[n++]=(byte)'%';
835                 byte nibble= (byte) ((b&0xf0)>>4);
836                 if (nibble>=10)
837                     encoded[n++]=(byte)('A'+nibble-10);
838                 else
839                     encoded[n++]=(byte)('0'+nibble);
840                 nibble= (byte) (b&0xf);
841                 if (nibble>=10)
842                     encoded[n++]=(byte)('A'+nibble-10);
843                 else
844                     encoded[n++]=(byte)('0'+nibble);
845             }
846         }
847 
848         if (noEncode)
849             return string;
850         
851         try
852         {    
853             return new String(encoded,0,n,charset);
854         }
855         catch(UnsupportedEncodingException e)
856         {
857             // Log.warn(LogSupport.EXCEPTION,e);
858             return new String(encoded,0,n);
859         }
860     }
861 
862 
863     /* ------------------------------------------------------------ */
864     /** 
865      */
866     public Object clone()
867     {
868         return new UrlEncoded(this);
869     }
870 }