View Javadoc

1   // ========================================================================
2   // Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
3   // ------------------------------------------------------------------------
4   // Licensed under the Apache License, Version 2.0 (the "License");
5   // you may not use this file except in compliance with the License.
6   // You may obtain a copy of the License at 
7   // http://www.apache.org/licenses/LICENSE-2.0
8   // Unless required by applicable law or agreed to in writing, software
9   // distributed under the License is distributed on an "AS IS" BASIS,
10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11  // See the License for the specific language governing permissions and
12  // limitations under the License.
13  // ========================================================================
14  
15  package org.mortbay.util;
16  
17  import java.io.IOException;
18  import java.io.InputStream;
19  import java.io.InputStreamReader;
20  import java.io.StringWriter;
21  import java.io.UnsupportedEncodingException;
22  import java.util.Iterator;
23  import java.util.Map;
24  
25  import org.mortbay.log.Log;
26  
27  
28  /* ------------------------------------------------------------ */
29  /** Handles coding of MIME  "x-www-form-urlencoded".
30   * This class handles the encoding and decoding for either
31   * the query string of a URL or the _content of a POST HTTP request.
32   *
33   * <p><h4>Notes</h4>
34   * The hashtable either contains String single values, vectors
35   * of String or arrays of Strings.
36   *
37   * This class is only partially synchronised.  In particular, simple
38   * get operations are not protected from concurrent updates.
39   *
40   * @see java.net.URLEncoder
41   * @author Greg Wilkins (gregw)
42   */
43  public class UrlEncoded extends MultiMap
44  {
45  
46      /* ----------------------------------------------------------------- */
47      public UrlEncoded(UrlEncoded url)
48      {
49          super(url);
50      }
51      
52      /* ----------------------------------------------------------------- */
53      public UrlEncoded()
54      {
55          super(6);
56      }
57      
58      /* ----------------------------------------------------------------- */
59      public UrlEncoded(String s)
60      {
61          super(6);
62          decode(s,StringUtil.__UTF8);
63      }
64      
65      /* ----------------------------------------------------------------- */
66      public UrlEncoded(String s, String charset)
67      {
68          super(6);
69          decode(s,charset);
70      }
71      
72      /* ----------------------------------------------------------------- */
73      public void decode(String query)
74      {
75          decodeTo(query,this,StringUtil.__UTF8);
76      }
77      
78      /* ----------------------------------------------------------------- */
79      public void decode(String query,String charset)
80      {
81          decodeTo(query,this,charset);
82      }
83      
84      /* -------------------------------------------------------------- */
85      /** Encode Hashtable with % encoding.
86       */
87      public String encode()
88      {
89          return encode(StringUtil.__UTF8,false);
90      }
91      
92      /* -------------------------------------------------------------- */
93      /** Encode Hashtable with % encoding.
94       */
95      public String encode(String charset)
96      {
97          return encode(charset,false);
98      }
99      
100     /* -------------------------------------------------------------- */
101     /** Encode Hashtable with % encoding.
102      * @param equalsForNullValue if True, then an '=' is always used, even
103      * for parameters without a value. e.g. "blah?a=&b=&c=".
104      */
105     public synchronized String encode(String charset, boolean equalsForNullValue)
106     {
107         return encode(this,charset,equalsForNullValue);
108     }
109     
110     /* -------------------------------------------------------------- */
111     /** Encode Hashtable with % encoding.
112      * @param equalsForNullValue if True, then an '=' is always used, even
113      * for parameters without a value. e.g. "blah?a=&b=&c=".
114      */
115     public static String encode(MultiMap map, String charset, boolean equalsForNullValue)
116     {
117         if (charset==null)
118             charset=StringUtil.__UTF8;
119         
120         StringBuffer result = new StringBuffer(128);
121         synchronized(result)
122         {
123             Iterator iter = map.entrySet().iterator();
124             while(iter.hasNext())
125             {
126                 Map.Entry entry = (Map.Entry)iter.next();
127                 
128                 String key = entry.getKey().toString();
129                 Object list = entry.getValue();
130                 int s=LazyList.size(list);
131                 
132                 if (s==0)
133                 {
134                     result.append(encodeString(key,charset));
135                     if(equalsForNullValue)
136                         result.append('=');
137                 }
138                 else
139                 {
140                     for (int i=0;i<s;i++)
141                     {
142                         if (i>0)
143                             result.append('&');
144                         Object val=LazyList.get(list,i);
145                         result.append(encodeString(key,charset));
146 
147                         if (val!=null)
148                         {
149                             String str=val.toString();
150                             if (str.length()>0)
151                             {
152                                 result.append('=');
153                                 result.append(encodeString(str,charset));
154                             }
155                             else if (equalsForNullValue)
156                                 result.append('=');
157                         }
158                         else if (equalsForNullValue)
159                             result.append('=');
160                     }
161                 }
162                 if (iter.hasNext())
163                     result.append('&');
164             }
165             return result.toString();
166         }
167     }
168 
169 
170     /* -------------------------------------------------------------- */
171     /** Decoded parameters to Map.
172      * @param content the string containing the encoded parameters
173      */
174     public static void decodeTo(String content, MultiMap map, String charset)
175     {
176         if (charset==null)
177             charset=StringUtil.__UTF8;
178 
179         synchronized(map)
180         {
181             String key = null;
182             String value = null;
183             int mark=-1;
184             boolean encoded=false;
185             for (int i=0;i<content.length();i++)
186             {
187                 char c = content.charAt(i);
188                 switch (c)
189                 {
190                   case '&':
191                       int l=i-mark-1;
192                       value = l==0?"":
193                           (encoded?decodeString(content,mark+1,l,charset):content.substring(mark+1,i));
194                       mark=i;
195                       encoded=false;
196                       if (key != null)
197                       {
198                           map.add(key,value);
199                       }
200                       else if (value!=null&&value.length()>0)
201                       {
202                           map.add(value,"");
203                       }
204                       key = null;
205                       value=null;
206                       break;
207                   case '=':
208                       if (key!=null)
209                           break;
210                       key = encoded?decodeString(content,mark+1,i-mark-1,charset):content.substring(mark+1,i);
211                       mark=i;
212                       encoded=false;
213                       break;
214                   case '+':
215                       encoded=true;
216                       break;
217                   case '%':
218                       encoded=true;
219                       break;
220                 }                
221             }
222             
223             if (key != null)
224             {
225                 int l=content.length()-mark-1;
226                 value = l==0?"":(encoded?decodeString(content,mark+1,l,charset):content.substring(mark+1));
227                 map.add(key,value);
228             }
229             else if (mark<content.length())
230             {
231                 key = encoded
232                     ?decodeString(content,mark+1,content.length()-mark-1,charset)
233                     :content.substring(mark+1);
234                 map.add(key,"");
235             }
236         }
237     }
238 
239     /* -------------------------------------------------------------- */
240     /** Decoded parameters to Map.
241      * @param data the byte[] containing the encoded parameters
242      */
243     public static void decodeUtf8To(byte[] raw,int offset, int length, MultiMap map)
244     {
245         decodeUtf8To(raw,offset,length,map,new Utf8StringBuffer());
246     }
247 
248     /* -------------------------------------------------------------- */
249     /** Decoded parameters to Map.
250      * @param data the byte[] containing the encoded parameters
251      */
252     public static void decodeUtf8To(byte[] raw,int offset, int length, MultiMap map,Utf8StringBuffer buffer)
253     {
254         synchronized(map)
255         {
256             String key = null;
257             String value = null;
258             
259             // TODO cache of parameter names ???
260             int end=offset+length;
261             for (int i=offset;i<end;i++)
262             {
263                 byte b=raw[i];
264                 switch ((char)(0xff&b))
265                 {
266                     case '&':
267                         value = buffer.length()==0?"":buffer.toString();
268                         buffer.reset();
269                         if (key != null)
270                         {
271                             map.add(key,value);
272                         }
273                         else if (value!=null&&value.length()>0)
274                         {
275                             map.add(value,"");
276                         }
277                         key = null;
278                         value=null;
279                         break;
280                         
281                     case '=':
282                         if (key!=null)
283                         {
284                             buffer.append(b);
285                             break;
286                         }
287                         key = buffer.toString();
288                         buffer.reset();
289                         break;
290                         
291                     case '+':
292                         buffer.append((byte)' ');
293                         break;
294                         
295                     case '%':
296                         if (i+2<end)
297                             buffer.append((byte)((TypeUtil.convertHexDigit(raw[++i])<<4) + TypeUtil.convertHexDigit(raw[++i])));
298                         break;
299                     default:
300                         buffer.append(b);
301                     break;
302                 }
303             }
304             
305             if (key != null)
306             {
307                 value = buffer.length()==0?"":buffer.toString();
308                 buffer.reset();
309                 map.add(key,value);
310             }
311             else if (buffer.length()>0)
312             {
313                 map.add(buffer.toString(),"");
314             }
315         }
316     }
317 
318     /* -------------------------------------------------------------- */
319     /** Decoded parameters to Map.
320      * @param in InputSteam to read
321      * @param map MultiMap to add parameters to
322      * @param maxLength maximum length of content to read 0r -1 for no limit
323      */
324     public static void decode88591To(InputStream in, MultiMap map, int maxLength)
325     throws IOException
326     {
327         synchronized(map)
328         {
329             StringBuffer buffer = new StringBuffer();
330             String key = null;
331             String value = null;
332             
333             int b;
334 
335             // TODO cache of parameter names ???
336             int totalLength=0;
337             while ((b=in.read())>=0)
338             {
339                 switch ((char) b)
340                 {
341                     case '&':
342                         value = buffer.length()==0?"":buffer.toString();
343                         buffer.setLength(0);
344                         if (key != null)
345                         {
346                             map.add(key,value);
347                         }
348                         else if (value!=null&&value.length()>0)
349                         {
350                             map.add(value,"");
351                         }
352                         key = null;
353                         value=null;
354                         break;
355                         
356                     case '=':
357                         if (key!=null)
358                         {
359                             buffer.append((char)b);
360                             break;
361                         }
362                         key = buffer.toString();
363                         buffer.setLength(0);
364                         break;
365                         
366                     case '+':
367                         buffer.append((char)' ');
368                         break;
369                         
370                     case '%':
371                         int dh=in.read();
372                         int dl=in.read();
373                         if (dh<0||dl<0)
374                             break;
375                         buffer.append((char)((TypeUtil.convertHexDigit((byte)dh)<<4) + TypeUtil.convertHexDigit((byte)dl)));
376                         break;
377                     default:
378                         buffer.append((char)b);
379                     break;
380                 }
381                 if (maxLength>=0 && (++totalLength > maxLength))
382                     throw new IllegalStateException("Form too large");
383             }
384             
385             if (key != null)
386             {
387                 value = buffer.length()==0?"":buffer.toString();
388                 buffer.setLength(0);
389                 map.add(key,value);
390             }
391             else if (buffer.length()>0)
392             {
393                 map.add(buffer.toString(), "");
394             }
395         }
396     }
397     
398     /* -------------------------------------------------------------- */
399     /** Decoded parameters to Map.
400      * @param in InputSteam to read
401      * @param map MultiMap to add parameters to
402      * @param maxLength maximum length of conent to read 0r -1 for no limit
403      */
404     public static void decodeUtf8To(InputStream in, MultiMap map, int maxLength)
405     throws IOException
406     {
407         synchronized(map)
408         {
409             Utf8StringBuffer buffer = new Utf8StringBuffer();
410             String key = null;
411             String value = null;
412             
413             int b;
414             
415             // TODO cache of parameter names ???
416             int totalLength=0;
417             while ((b=in.read())>=0)
418             {
419                 switch ((char) b)
420                 {
421                     case '&':
422                         value = buffer.length()==0?"":buffer.toString();
423                         buffer.reset();
424                         if (key != null)
425                         {
426                             map.add(key,value);
427                         }
428                         else if (value!=null&&value.length()>0)
429                         {
430                             map.add(value,"");
431                         }
432                         key = null;
433                         value=null;
434                         break;
435                         
436                     case '=':
437                         if (key!=null)
438                         {
439                             buffer.append((byte)b);
440                             break;
441                         }
442                         key = buffer.toString();
443                         buffer.reset();
444                         break;
445                         
446                     case '+':
447                         buffer.append((byte)' ');
448                         break;
449                         
450                     case '%':
451                         int dh=in.read();
452                         int dl=in.read();
453                         if (dh<0||dl<0)
454                             break;
455                         buffer.append((byte)((TypeUtil.convertHexDigit((byte)dh)<<4) + TypeUtil.convertHexDigit((byte)dl)));
456                         break;
457                     default:
458                         buffer.append((byte)b);
459                     break;
460                 }
461                 if (maxLength>=0 && (++totalLength > maxLength))
462                     throw new IllegalStateException("Form too large");
463             }
464             
465             if (key != null)
466             {
467                 value = buffer.length()==0?"":buffer.toString();
468                 buffer.reset();
469                 map.add(key,value);
470             }
471             else if (buffer.length()>0)
472             {
473                 map.add(buffer.toString(), "");
474             }
475         }
476     }
477     
478     /* -------------------------------------------------------------- */
479     public static void decodeUtf16To(InputStream in, MultiMap map, int maxLength) throws IOException
480     {
481         InputStreamReader input = new InputStreamReader(in,StringUtil.__UTF16);
482         StringBuffer buf = new StringBuffer();
483 
484         int c;
485         int length=0;
486         if (maxLength<0)
487             maxLength=Integer.MAX_VALUE;
488         while ((c=input.read())>0 && length++<maxLength)
489             buf.append((char)c);
490         decodeTo(buf.toString(),map,StringUtil.__UTF8);
491     }
492     
493     /* -------------------------------------------------------------- */
494     /** Decoded parameters to Map.
495      * @param in the stream containing the encoded parameters
496      */
497     public static void decodeTo(InputStream in, MultiMap map, String charset, int maxLength)
498     throws IOException
499     {
500 
501         if (charset==null || StringUtil.__UTF8.equalsIgnoreCase(charset))
502         {
503             decodeUtf8To(in,map,maxLength);
504             return;
505         }
506         
507         if (StringUtil.__ISO_8859_1.equals(charset))
508         {
509             decode88591To(in,map,maxLength);
510             return;
511         }
512 
513         if (StringUtil.__UTF16.equalsIgnoreCase(charset)) // Should be all 2 byte encodings
514         {
515             decodeUtf16To(in,map,maxLength);
516             return;
517         }
518         
519 
520         synchronized(map)
521         {
522             String key = null;
523             String value = null;
524             
525             int c;
526             int digit=0;
527             int digits=0;
528             
529             int totalLength = 0;
530             ByteArrayOutputStream2 output = new ByteArrayOutputStream2();
531             
532             int size=0;
533             
534             while ((c=in.read())>0)
535             {
536                 switch ((char) c)
537                 {
538                     case '&':
539                         size=output.size();
540                         value = size==0?"":output.toString(charset);
541                         output.setCount(0);
542                         if (key != null)
543                         {
544                             map.add(key,value);
545                         }
546                         else if (value!=null&&value.length()>0)
547                         {
548                             map.add(value,"");
549                         }
550                         key = null;
551                         value=null;
552                         break;
553                     case '=':
554                         if (key!=null)
555                         {
556                             output.write(c);
557                             break;
558                         }
559                         size=output.size();
560                         key = size==0?"":output.toString(charset);
561                         output.setCount(0);
562                         break;
563                     case '+':
564                         output.write(' ');
565                         break;
566                     case '%':
567                         digits=2;
568                         break;
569                     default:
570                         if (digits==2)
571                         {
572                             digit=TypeUtil.convertHexDigit((byte)c);
573                             digits=1;
574                         }
575                         else if (digits==1)
576                         {
577                             output.write((digit<<4) + TypeUtil.convertHexDigit((byte)c));
578                             digits=0;
579                         }
580                         else
581                             output.write(c);
582                     break;
583                 }
584                 
585                 totalLength++;
586                 if (maxLength>=0 && totalLength > maxLength)
587                     throw new IllegalStateException("Form too large");
588             }
589 
590             size=output.size();
591             if (key != null)
592             {
593                 value = size==0?"":output.toString(charset);
594                 output.setCount(0);
595                 map.add(key,value);
596             }
597             else if (size>0)
598                 map.add(output.toString(charset),"");
599         }
600     }
601     
602     /* -------------------------------------------------------------- */
603     /** Decode String with % encoding.
604      * This method makes the assumption that the majority of calls
605      * will need no decoding.
606      */
607     public static String decodeString(String encoded,int offset,int length,String charset)
608     {
609         if (charset==null || StringUtil.isUTF8(charset))
610         {
611             Utf8StringBuffer buffer=null;
612 
613             for (int i=0;i<length;i++)
614             {
615                 char c = encoded.charAt(offset+i);
616                 if (c<0||c>0xff)
617                 {
618                     if (buffer==null)
619                     {
620                         buffer=new Utf8StringBuffer(length);
621                         buffer.getStringBuffer().append(encoded.substring(offset,offset+i+1));
622                     }
623                     else
624                         buffer.getStringBuffer().append(c);
625                 }
626                 else if (c=='+')
627                 {
628                     if (buffer==null)
629                     {
630                         buffer=new Utf8StringBuffer(length);
631                         buffer.getStringBuffer().append(encoded.substring(offset,offset+i));
632                     }
633                     
634                     buffer.getStringBuffer().append(' ');
635                 }
636                 else if (c=='%' && (i+2)<length)
637                 {
638                     if (buffer==null)
639                     {
640                         buffer=new Utf8StringBuffer(length);
641                         buffer.getStringBuffer().append(encoded.substring(offset,offset+i));
642                     }
643 
644                     while(c=='%' && (i+2)<length)
645                     {
646                         try
647                         {
648                             byte b=(byte)TypeUtil.parseInt(encoded,offset+i+1,2,16);
649                             buffer.append(b);
650                             i+=3;
651                         }
652                         catch(NumberFormatException nfe)
653                         {
654                             buffer.getStringBuffer().append('%');
655                             for(char next; ((next=encoded.charAt(++i+offset))!='%');)
656                                 buffer.getStringBuffer().append((next=='+' ? ' ' : next));
657                         }
658 
659                         if (i<length)
660                             c = encoded.charAt(offset+i);
661                     }
662                     i--;
663                 }
664                 else if (buffer!=null)
665                     buffer.getStringBuffer().append(c);
666             }
667 
668             if (buffer==null)
669             {
670                 if (offset==0 && encoded.length()==length)
671                     return encoded;
672                 return encoded.substring(offset,offset+length);
673             }
674 
675             return buffer.toString();
676         }
677         else
678         {
679             StringBuffer buffer=null;
680 
681             try
682             {
683                 for (int i=0;i<length;i++)
684                 {
685                     char c = encoded.charAt(offset+i);
686                     if (c<0||c>0xff)
687                     {
688                         if (buffer==null)
689                         {
690                             buffer=new StringBuffer(length);
691                             buffer.append(encoded.substring(offset,offset+i+1));
692                         }
693                         else
694                             buffer.append(c);
695                     }
696                     else if (c=='+')
697                     {
698                         if (buffer==null)
699                         {
700                             buffer=new StringBuffer(length);
701                             buffer.append(encoded.substring(offset,offset+i));
702                         }
703                         
704                         buffer.append(' ');
705                     }
706                     else if (c=='%' && (i+2)<length)
707                     {
708                         if (buffer==null)
709                         {
710                             buffer=new StringBuffer(length);
711                             buffer.append(encoded.substring(offset,offset+i));
712                         }
713 
714                         byte[] ba=new byte[length];
715                         int n=0;
716                         while(c>=0 && c<=0xff)
717                         {
718                             if (c=='%')
719                             {   
720                                 if(i+2<length)
721                                 {
722                                     try
723                                     {
724                                         ba[n++]=(byte)TypeUtil.parseInt(encoded,offset+i+1,2,16);
725                                         i+=3;
726                                     }
727                                     catch(NumberFormatException nfe)
728                                     {                                        
729                                         ba[n-1] = (byte)'%';                                    
730                                         for(char next; ((next=encoded.charAt(++i+offset))!='%');)
731                                             ba[n++] = (byte)(next=='+' ? ' ' : next);
732                                     }
733                                 }
734                                 else
735                                 {
736                                     ba[n++] = (byte)'%';
737                                     i++;
738                                 }
739                             }
740                             else if (c=='+')
741                             {
742                                 ba[n++]=(byte)' ';
743                                 i++;
744                             }
745                             else
746                             {
747                                 ba[n++]=(byte)c;
748                                 i++;
749                             }
750                             
751                             if (i>=length)
752                                 break;
753                             c = encoded.charAt(offset+i);
754                         }
755 
756                         i--;
757                         buffer.append(new String(ba,0,n,charset));
758 
759                     }
760                     else if (buffer!=null)
761                         buffer.append(c);
762                 }
763 
764                 if (buffer==null)
765                 {
766                     if (offset==0 && encoded.length()==length)
767                         return encoded;
768                     return encoded.substring(offset,offset+length);
769                 }
770 
771                 return buffer.toString();
772             }
773             catch (UnsupportedEncodingException e)
774             {
775                 throw new RuntimeException(e);
776             }
777         }
778         
779     }
780     
781     /* ------------------------------------------------------------ */
782     /** Perform URL encoding.
783      * Assumes 8859 charset
784      * @param string 
785      * @return encoded string.
786      */
787     public static String encodeString(String string)
788     {
789         return encodeString(string,StringUtil.__UTF8);
790     }
791     
792     /* ------------------------------------------------------------ */
793     /** Perform URL encoding.
794      * @param string 
795      * @return encoded string.
796      */
797     public static String encodeString(String string,String charset)
798     {
799         if (charset==null)
800             charset=StringUtil.__UTF8;
801         byte[] bytes=null;
802         try
803         {
804             bytes=string.getBytes(charset);
805         }
806         catch(UnsupportedEncodingException e)
807         {
808             // Log.warn(LogSupport.EXCEPTION,e);
809             bytes=string.getBytes();
810         }
811         
812         int len=bytes.length;
813         byte[] encoded= new byte[bytes.length*3];
814         int n=0;
815         boolean noEncode=true;
816         
817         for (int i=0;i<len;i++)
818         {
819             byte b = bytes[i];
820             
821             if (b==' ')
822             {
823                 noEncode=false;
824                 encoded[n++]=(byte)'+';
825             }
826             else if (b>='a' && b<='z' ||
827                      b>='A' && b<='Z' ||
828                      b>='0' && b<='9')
829             {
830                 encoded[n++]=b;
831             }
832             else
833             {
834                 noEncode=false;
835                 encoded[n++]=(byte)'%';
836                 byte nibble= (byte) ((b&0xf0)>>4);
837                 if (nibble>=10)
838                     encoded[n++]=(byte)('A'+nibble-10);
839                 else
840                     encoded[n++]=(byte)('0'+nibble);
841                 nibble= (byte) (b&0xf);
842                 if (nibble>=10)
843                     encoded[n++]=(byte)('A'+nibble-10);
844                 else
845                     encoded[n++]=(byte)('0'+nibble);
846             }
847         }
848 
849         if (noEncode)
850             return string;
851         
852         try
853         {    
854             return new String(encoded,0,n,charset);
855         }
856         catch(UnsupportedEncodingException e)
857         {
858             // Log.warn(LogSupport.EXCEPTION,e);
859             return new String(encoded,0,n);
860         }
861     }
862 
863 
864     /* ------------------------------------------------------------ */
865     /** 
866      */
867     public Object clone()
868     {
869         return new UrlEncoded(this);
870     }
871 }