001/* Charset.java -- 
002   Copyright (C) 2002, 2004, 2005, 2007  Free Software Foundation, Inc.
003
004This file is part of GNU Classpath.
005
006GNU Classpath is free software; you can redistribute it and/or modify
007it under the terms of the GNU General Public License as published by
008the Free Software Foundation; either version 2, or (at your option)
009any later version.
010
011GNU Classpath is distributed in the hope that it will be useful, but
012WITHOUT ANY WARRANTY; without even the implied warranty of
013MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
014General Public License for more details.
015
016You should have received a copy of the GNU General Public License
017along with GNU Classpath; see the file COPYING.  If not, write to the
018Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
01902110-1301 USA.
020
021Linking this library statically or dynamically with other modules is
022making a combined work based on this library.  Thus, the terms and
023conditions of the GNU General Public License cover the whole
024combination.
025
026As a special exception, the copyright holders of this library give you
027permission to link this library with independent modules to produce an
028executable, regardless of the license terms of these independent
029modules, and to copy and distribute the resulting executable under
030terms of your choice, provided that you also meet, for each linked
031independent module, the terms and conditions of the license of that
032module.  An independent module is a module which is not derived from
033or based on this library.  If you modify this library, you may extend
034this exception to your version of the library, but you are not
035obligated to do so.  If you do not wish to do so, delete this
036exception statement from your version. */
037
038
039package java.nio.charset;
040
041import gnu.classpath.ServiceFactory;
042import gnu.classpath.SystemProperties;
043import gnu.java.nio.charset.Provider;
044
045import java.io.BufferedReader;
046import java.io.InputStreamReader;
047import java.net.URL;
048import java.nio.ByteBuffer;
049import java.nio.CharBuffer;
050import java.nio.charset.spi.CharsetProvider;
051import java.util.Collections;
052import java.util.Enumeration;
053import java.util.HashSet;
054import java.util.Iterator;
055import java.util.LinkedHashSet;
056import java.util.Locale;
057import java.util.Set;
058import java.util.SortedMap;
059import java.util.TreeMap;
060
061/**
062 * @author Jesse Rosenstock
063 * @since 1.4
064 * @status updated to 1.5
065 */
066public abstract class Charset implements Comparable<Charset>
067{
068  private CharsetEncoder cachedEncoder;
069  private CharsetDecoder cachedDecoder;
070 
071  /**
072   * Extra Charset providers.
073   */
074  private static CharsetProvider[] providers;
075  
076  private final String canonicalName;
077  private final String[] aliases;
078  
079  protected Charset (String canonicalName, String[] aliases)
080  {
081    checkName (canonicalName);
082    if (aliases != null)
083      {
084        int n = aliases.length;
085        for (int i = 0; i < n; ++i)
086            checkName (aliases[i]);
087      }
088
089    cachedEncoder = null;
090    cachedDecoder = null;
091    this.canonicalName = canonicalName;
092    this.aliases = aliases;
093  }
094
095  /**
096   * @throws IllegalCharsetNameException  if the name is illegal
097   */
098  private static void checkName (String name)
099  {
100    int n = name.length ();
101
102    if (n == 0)
103      throw new IllegalCharsetNameException (name);
104
105    char ch = name.charAt (0);
106    if (!(('A' <= ch && ch <= 'Z')
107          || ('a' <= ch && ch <= 'z')
108          || ('0' <= ch && ch <= '9')))
109      throw new IllegalCharsetNameException (name);
110
111    for (int i = 1; i < n; ++i)
112      {
113        ch = name.charAt (i);
114        if (!(('A' <= ch && ch <= 'Z')
115              || ('a' <= ch && ch <= 'z')
116              || ('0' <= ch && ch <= '9')
117              || ch == '-' || ch == '.' || ch == ':' || ch == '_'))
118          throw new IllegalCharsetNameException (name);
119      }
120  }
121
122  /**
123   * Returns the system default charset.
124   *
125   * This may be set by the user or VM with the file.encoding
126   * property.
127   *
128   * @since 1.5
129   */
130  public static Charset defaultCharset()
131  {
132    String encoding;
133    
134    try 
135      {
136        encoding = SystemProperties.getProperty("file.encoding");
137      }
138    catch(SecurityException e)
139      {
140        // Use fallback.
141        encoding = "ISO-8859-1";
142      }
143    catch(IllegalArgumentException e)
144      {
145        // Use fallback.
146        encoding = "ISO-8859-1";
147      }
148
149    try
150      {
151        return forName(encoding);
152      }
153    catch(UnsupportedCharsetException e)
154      {
155        // Ignore.
156      }
157    catch(IllegalCharsetNameException e)
158      {
159        // Ignore.
160      }
161    catch(IllegalArgumentException e)
162      {
163        // Ignore.
164      }
165    
166    throw new IllegalStateException("Can't get default charset!");
167  }
168
169  public static boolean isSupported (String charsetName)
170  {
171    return charsetForName (charsetName) != null;
172  }
173
174  /**
175   * Returns the Charset instance for the charset of the given name.
176   * 
177   * @param charsetName
178   * @return the Charset instance for the indicated charset
179   * @throws UnsupportedCharsetException if this VM does not support
180   * the charset of the given name.
181   * @throws IllegalCharsetNameException if the given charset name is
182   * legal.
183   * @throws IllegalArgumentException if <code>charsetName</code> is null.
184   */
185  public static Charset forName (String charsetName)
186  {
187    // Throws IllegalArgumentException as the JDK does.
188    if(charsetName == null)
189        throw new IllegalArgumentException("Charset name must not be null.");
190    
191    Charset cs = charsetForName (charsetName);
192    if (cs == null)
193      throw new UnsupportedCharsetException (charsetName);
194    return cs;
195  }
196
197  /**
198   * Retrieves a charset for the given charset name.
199   *
200   * @return A charset object for the charset with the specified name, or
201   * <code>null</code> if no such charset exists.
202   *
203   * @throws IllegalCharsetNameException  if the name is illegal
204   */
205  private static Charset charsetForName(String charsetName)
206  {
207    checkName (charsetName);
208    // Try the default provider first
209    // (so we don't need to load external providers unless really necessary)
210    // if it is an exotic charset try loading the external providers.
211    Charset cs = provider().charsetForName(charsetName);
212    if (cs == null)
213      {
214        CharsetProvider[] providers = providers2();
215        for (int i = 0; i < providers.length; i++)
216          {
217            cs = providers[i].charsetForName(charsetName);
218            if (cs != null)
219              break;
220          }
221      }
222    return cs;
223  }
224
225  public static SortedMap<String, Charset> availableCharsets()
226  {
227    TreeMap<String, Charset> charsets
228      = new TreeMap(String.CASE_INSENSITIVE_ORDER);
229    for (Iterator<Charset> i = provider().charsets(); i.hasNext(); )
230      {
231        Charset cs = i.next();
232        charsets.put(cs.name(), cs);
233      }
234
235    CharsetProvider[] providers = providers2();
236    for (int j = 0; j < providers.length; j++)
237      {
238        for (Iterator<Charset> i = providers[j].charsets(); i.hasNext(); )
239          {
240            Charset cs = (Charset) i.next();
241            charsets.put(cs.name(), cs);
242          }
243      }
244
245    return Collections.unmodifiableSortedMap(charsets);
246  }
247
248  private static CharsetProvider provider()
249  {
250    try
251      {
252        String s = System.getProperty("charset.provider");
253        if (s != null)
254          {
255            CharsetProvider p =
256              (CharsetProvider) ((Class.forName(s)).newInstance());
257            return p;
258          }
259      }
260    catch (Exception e)
261      {
262        // Ignore.
263      }
264    
265    return Provider.provider();
266  }
267
268  /**
269   * We need to support multiple providers, reading them from
270   * java.nio.charset.spi.CharsetProvider in the resource directory
271   * META-INF/services. This returns the "extra" charset providers.
272   */
273  private static CharsetProvider[] providers2()
274  {
275    if (providers == null)
276      {
277        try
278          {
279            Iterator i = ServiceFactory.lookupProviders(CharsetProvider.class);
280            LinkedHashSet set = new LinkedHashSet();
281            while (i.hasNext())
282              set.add(i.next());
283
284            providers = new CharsetProvider[set.size()];
285            set.toArray(providers);
286          }
287        catch (Exception e)
288          {
289            throw new RuntimeException(e);
290          }
291      }
292    return providers;
293  }
294
295  public final String name ()
296  {
297    return canonicalName;
298  }
299
300  public final Set<String> aliases ()
301  {
302    if (aliases == null)
303      return Collections.<String>emptySet();
304
305    // should we cache the aliasSet instead?
306    int n = aliases.length;
307    HashSet<String> aliasSet = new HashSet<String> (n);
308    for (int i = 0; i < n; ++i)
309        aliasSet.add (aliases[i]);
310    return Collections.unmodifiableSet (aliasSet);
311  }
312
313  public String displayName ()
314  {
315    return canonicalName;
316  }
317
318  public String displayName (Locale locale)
319  {
320    return canonicalName;
321  }
322
323  public final boolean isRegistered ()
324  {
325    return (!canonicalName.startsWith ("x-")
326            && !canonicalName.startsWith ("X-"));
327  }
328
329  public abstract boolean contains (Charset cs);
330
331  public abstract CharsetDecoder newDecoder ();
332
333  public abstract CharsetEncoder newEncoder ();
334
335  public boolean canEncode ()
336  {
337    return true;
338  }
339
340  // NB: This implementation serializes different threads calling
341  // Charset.encode(), a potential performance problem.  It might
342  // be better to remove the cache, or use ThreadLocal to cache on
343  // a per-thread basis.
344  public final synchronized ByteBuffer encode (CharBuffer cb)
345  {
346    try
347      {
348        if (cachedEncoder == null)
349          {
350            cachedEncoder = newEncoder ()
351              .onMalformedInput (CodingErrorAction.REPLACE)
352              .onUnmappableCharacter (CodingErrorAction.REPLACE);
353          } else
354          cachedEncoder.reset();
355        return cachedEncoder.encode (cb);
356      }
357    catch (CharacterCodingException e)
358      {
359        throw new AssertionError (e);
360      }
361  }
362  
363  public final ByteBuffer encode (String str)
364  {
365    return encode (CharBuffer.wrap (str));
366  }
367
368  // NB: This implementation serializes different threads calling
369  // Charset.decode(), a potential performance problem.  It might
370  // be better to remove the cache, or use ThreadLocal to cache on
371  // a per-thread basis.
372  public final synchronized CharBuffer decode (ByteBuffer bb)
373  {
374    try
375      {
376        if (cachedDecoder == null)
377          {
378            cachedDecoder = newDecoder ()
379              .onMalformedInput (CodingErrorAction.REPLACE)
380              .onUnmappableCharacter (CodingErrorAction.REPLACE);
381          } else
382          cachedDecoder.reset();
383
384        return cachedDecoder.decode (bb);
385      }
386    catch (CharacterCodingException e)
387      {
388        throw new AssertionError (e);
389      }
390  }
391
392  public final int compareTo (Charset other)
393  {
394    return canonicalName.compareToIgnoreCase (other.canonicalName);
395  }
396
397  public final int hashCode ()
398  {
399    return canonicalName.hashCode ();
400  }
401
402  public final boolean equals (Object ob)
403  {
404    if (ob instanceof Charset)
405      return canonicalName.equalsIgnoreCase (((Charset) ob).canonicalName);
406    else
407      return false;
408  }
409
410  public final String toString ()
411  {
412    return canonicalName;
413  }
414}