001    /* Charset.java -- 
002       Copyright (C) 2002, 2004, 2005, 2007  Free Software Foundation, Inc.
003    
004    This file is part of GNU Classpath.
005    
006    GNU Classpath is free software; you can redistribute it and/or modify
007    it under the terms of the GNU General Public License as published by
008    the Free Software Foundation; either version 2, or (at your option)
009    any later version.
010    
011    GNU Classpath is distributed in the hope that it will be useful, but
012    WITHOUT ANY WARRANTY; without even the implied warranty of
013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014    General Public License for more details.
015    
016    You should have received a copy of the GNU General Public License
017    along with GNU Classpath; see the file COPYING.  If not, write to the
018    Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
019    02110-1301 USA.
020    
021    Linking this library statically or dynamically with other modules is
022    making a combined work based on this library.  Thus, the terms and
023    conditions of the GNU General Public License cover the whole
024    combination.
025    
026    As a special exception, the copyright holders of this library give you
027    permission to link this library with independent modules to produce an
028    executable, regardless of the license terms of these independent
029    modules, and to copy and distribute the resulting executable under
030    terms of your choice, provided that you also meet, for each linked
031    independent module, the terms and conditions of the license of that
032    module.  An independent module is a module which is not derived from
033    or based on this library.  If you modify this library, you may extend
034    this exception to your version of the library, but you are not
035    obligated to do so.  If you do not wish to do so, delete this
036    exception statement from your version. */
037    
038    
039    package java.nio.charset;
040    
041    import gnu.classpath.ServiceFactory;
042    import gnu.classpath.SystemProperties;
043    import gnu.java.nio.charset.Provider;
044    
045    import java.io.BufferedReader;
046    import java.io.InputStreamReader;
047    import java.net.URL;
048    import java.nio.ByteBuffer;
049    import java.nio.CharBuffer;
050    import java.nio.charset.spi.CharsetProvider;
051    import java.util.Collections;
052    import java.util.Enumeration;
053    import java.util.HashSet;
054    import java.util.Iterator;
055    import java.util.LinkedHashSet;
056    import java.util.Locale;
057    import java.util.Set;
058    import java.util.SortedMap;
059    import java.util.TreeMap;
060    
061    /**
062     * @author Jesse Rosenstock
063     * @since 1.4
064     * @status updated to 1.5
065     */
066    public abstract class Charset implements Comparable<Charset>
067    {
068      private CharsetEncoder cachedEncoder;
069      private CharsetDecoder cachedDecoder;
070     
071      /**
072       * Extra Charset providers.
073       */
074      private static CharsetProvider[] providers;
075      
076      private final String canonicalName;
077      private final String[] aliases;
078      
079      protected Charset (String canonicalName, String[] aliases)
080      {
081        checkName (canonicalName);
082        if (aliases != null)
083          {
084            int n = aliases.length;
085            for (int i = 0; i < n; ++i)
086                checkName (aliases[i]);
087          }
088    
089        cachedEncoder = null;
090        cachedDecoder = null;
091        this.canonicalName = canonicalName;
092        this.aliases = aliases;
093      }
094    
095      /**
096       * @throws IllegalCharsetNameException  if the name is illegal
097       */
098      private static void checkName (String name)
099      {
100        int n = name.length ();
101    
102        if (n == 0)
103          throw new IllegalCharsetNameException (name);
104    
105        char ch = name.charAt (0);
106        if (!(('A' <= ch && ch <= 'Z')
107              || ('a' <= ch && ch <= 'z')
108              || ('0' <= ch && ch <= '9')))
109          throw new IllegalCharsetNameException (name);
110    
111        for (int i = 1; i < n; ++i)
112          {
113            ch = name.charAt (i);
114            if (!(('A' <= ch && ch <= 'Z')
115                  || ('a' <= ch && ch <= 'z')
116                  || ('0' <= ch && ch <= '9')
117                  || ch == '-' || ch == '.' || ch == ':' || ch == '_'))
118              throw new IllegalCharsetNameException (name);
119          }
120      }
121    
122      /**
123       * Returns the system default charset.
124       *
125       * This may be set by the user or VM with the file.encoding
126       * property.
127       *
128       * @since 1.5
129       */
130      public static Charset defaultCharset()
131      {
132        String encoding;
133        
134        try 
135          {
136            encoding = SystemProperties.getProperty("file.encoding");
137          }
138        catch(SecurityException e)
139          {
140            // Use fallback.
141            encoding = "ISO-8859-1";
142          }
143        catch(IllegalArgumentException e)
144          {
145            // Use fallback.
146            encoding = "ISO-8859-1";
147          }
148    
149        try
150          {
151            return forName(encoding);
152          }
153        catch(UnsupportedCharsetException e)
154          {
155            // Ignore.
156          }
157        catch(IllegalCharsetNameException e)
158          {
159            // Ignore.
160          }
161        catch(IllegalArgumentException e)
162          {
163            // Ignore.
164          }
165        
166        throw new IllegalStateException("Can't get default charset!");
167      }
168    
169      public static boolean isSupported (String charsetName)
170      {
171        return charsetForName (charsetName) != null;
172      }
173    
174      /**
175       * Returns the Charset instance for the charset of the given name.
176       * 
177       * @param charsetName
178       * @return the Charset instance for the indicated charset
179       * @throws UnsupportedCharsetException if this VM does not support
180       * the charset of the given name.
181       * @throws IllegalCharsetNameException if the given charset name is
182       * legal.
183       * @throws IllegalArgumentException if <code>charsetName</code> is null.
184       */
185      public static Charset forName (String charsetName)
186      {
187        // Throws IllegalArgumentException as the JDK does.
188        if(charsetName == null)
189            throw new IllegalArgumentException("Charset name must not be null.");
190        
191        Charset cs = charsetForName (charsetName);
192        if (cs == null)
193          throw new UnsupportedCharsetException (charsetName);
194        return cs;
195      }
196    
197      /**
198       * Retrieves a charset for the given charset name.
199       *
200       * @return A charset object for the charset with the specified name, or
201       * <code>null</code> if no such charset exists.
202       *
203       * @throws IllegalCharsetNameException  if the name is illegal
204       */
205      private static Charset charsetForName(String charsetName)
206      {
207        checkName (charsetName);
208        // Try the default provider first
209        // (so we don't need to load external providers unless really necessary)
210        // if it is an exotic charset try loading the external providers.
211        Charset cs = provider().charsetForName(charsetName);
212        if (cs == null)
213          {
214            CharsetProvider[] providers = providers2();
215            for (int i = 0; i < providers.length; i++)
216              {
217                cs = providers[i].charsetForName(charsetName);
218                if (cs != null)
219                  break;
220              }
221          }
222        return cs;
223      }
224    
225      public static SortedMap<String, Charset> availableCharsets()
226      {
227        TreeMap<String, Charset> charsets
228          = new TreeMap(String.CASE_INSENSITIVE_ORDER);
229        for (Iterator<Charset> i = provider().charsets(); i.hasNext(); )
230          {
231            Charset cs = i.next();
232            charsets.put(cs.name(), cs);
233          }
234    
235        CharsetProvider[] providers = providers2();
236        for (int j = 0; j < providers.length; j++)
237          {
238            for (Iterator<Charset> i = providers[j].charsets(); i.hasNext(); )
239              {
240                Charset cs = (Charset) i.next();
241                charsets.put(cs.name(), cs);
242              }
243          }
244    
245        return Collections.unmodifiableSortedMap(charsets);
246      }
247    
248      private static CharsetProvider provider()
249      {
250        try
251          {
252            String s = System.getProperty("charset.provider");
253            if (s != null)
254              {
255                CharsetProvider p =
256                  (CharsetProvider) ((Class.forName(s)).newInstance());
257                return p;
258              }
259          }
260        catch (Exception e)
261          {
262            // Ignore.
263          }
264        
265        return Provider.provider();
266      }
267    
268      /**
269       * We need to support multiple providers, reading them from
270       * java.nio.charset.spi.CharsetProvider in the resource directory
271       * META-INF/services. This returns the "extra" charset providers.
272       */
273      private static CharsetProvider[] providers2()
274      {
275        if (providers == null)
276          {
277            try
278              {
279                Iterator i = ServiceFactory.lookupProviders(CharsetProvider.class);
280                LinkedHashSet set = new LinkedHashSet();
281                while (i.hasNext())
282                  set.add(i.next());
283    
284                providers = new CharsetProvider[set.size()];
285                set.toArray(providers);
286              }
287            catch (Exception e)
288              {
289                throw new RuntimeException(e);
290              }
291          }
292        return providers;
293      }
294    
295      public final String name ()
296      {
297        return canonicalName;
298      }
299    
300      public final Set<String> aliases ()
301      {
302        if (aliases == null)
303          return Collections.<String>emptySet();
304    
305        // should we cache the aliasSet instead?
306        int n = aliases.length;
307        HashSet<String> aliasSet = new HashSet<String> (n);
308        for (int i = 0; i < n; ++i)
309            aliasSet.add (aliases[i]);
310        return Collections.unmodifiableSet (aliasSet);
311      }
312    
313      public String displayName ()
314      {
315        return canonicalName;
316      }
317    
318      public String displayName (Locale locale)
319      {
320        return canonicalName;
321      }
322    
323      public final boolean isRegistered ()
324      {
325        return (!canonicalName.startsWith ("x-")
326                && !canonicalName.startsWith ("X-"));
327      }
328    
329      public abstract boolean contains (Charset cs);
330    
331      public abstract CharsetDecoder newDecoder ();
332    
333      public abstract CharsetEncoder newEncoder ();
334    
335      public boolean canEncode ()
336      {
337        return true;
338      }
339    
340      // NB: This implementation serializes different threads calling
341      // Charset.encode(), a potential performance problem.  It might
342      // be better to remove the cache, or use ThreadLocal to cache on
343      // a per-thread basis.
344      public final synchronized ByteBuffer encode (CharBuffer cb)
345      {
346        try
347          {
348            if (cachedEncoder == null)
349              {
350                cachedEncoder = newEncoder ()
351                  .onMalformedInput (CodingErrorAction.REPLACE)
352                  .onUnmappableCharacter (CodingErrorAction.REPLACE);
353              } else
354              cachedEncoder.reset();
355            return cachedEncoder.encode (cb);
356          }
357        catch (CharacterCodingException e)
358          {
359            throw new AssertionError (e);
360          }
361      }
362      
363      public final ByteBuffer encode (String str)
364      {
365        return encode (CharBuffer.wrap (str));
366      }
367    
368      // NB: This implementation serializes different threads calling
369      // Charset.decode(), a potential performance problem.  It might
370      // be better to remove the cache, or use ThreadLocal to cache on
371      // a per-thread basis.
372      public final synchronized CharBuffer decode (ByteBuffer bb)
373      {
374        try
375          {
376            if (cachedDecoder == null)
377              {
378                cachedDecoder = newDecoder ()
379                  .onMalformedInput (CodingErrorAction.REPLACE)
380                  .onUnmappableCharacter (CodingErrorAction.REPLACE);
381              } else
382              cachedDecoder.reset();
383    
384            return cachedDecoder.decode (bb);
385          }
386        catch (CharacterCodingException e)
387          {
388            throw new AssertionError (e);
389          }
390      }
391    
392      public final int compareTo (Charset other)
393      {
394        return canonicalName.compareToIgnoreCase (other.canonicalName);
395      }
396    
397      public final int hashCode ()
398      {
399        return canonicalName.hashCode ();
400      }
401    
402      public final boolean equals (Object ob)
403      {
404        if (ob instanceof Charset)
405          return canonicalName.equalsIgnoreCase (((Charset) ob).canonicalName);
406        else
407          return false;
408      }
409    
410      public final String toString ()
411      {
412        return canonicalName;
413      }
414    }