001/* InputStreamReader.java -- Reader than transforms bytes to chars 002 Copyright (C) 1998, 1999, 2001, 2003, 2004, 2005 Free Software Foundation, Inc. 003 004This file is part of GNU Classpath. 005 006GNU Classpath is free software; you can redistribute it and/or modify 007it under the terms of the GNU General Public License as published by 008the Free Software Foundation; either version 2, or (at your option) 009any later version. 010 011GNU Classpath is distributed in the hope that it will be useful, but 012WITHOUT ANY WARRANTY; without even the implied warranty of 013MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 014General Public License for more details. 015 016You should have received a copy of the GNU General Public License 017along with GNU Classpath; see the file COPYING. If not, write to the 018Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 01902110-1301 USA. 020 021Linking this library statically or dynamically with other modules is 022making a combined work based on this library. Thus, the terms and 023conditions of the GNU General Public License cover the whole 024combination. 025 026As a special exception, the copyright holders of this library give you 027permission to link this library with independent modules to produce an 028executable, regardless of the license terms of these independent 029modules, and to copy and distribute the resulting executable under 030terms of your choice, provided that you also meet, for each linked 031independent module, the terms and conditions of the license of that 032module. An independent module is a module which is not derived from 033or based on this library. If you modify this library, you may extend 034this exception to your version of the library, but you are not 035obligated to do so. If you do not wish to do so, delete this 036exception statement from your version. */ 037 038 039package java.io; 040 041import gnu.gcj.convert.*; 042import java.nio.charset.Charset; 043import java.nio.charset.CharsetDecoder; 044 045/** 046 * This class reads characters from a byte input stream. The characters 047 * read are converted from bytes in the underlying stream by a 048 * decoding layer. The decoding layer transforms bytes to chars according 049 * to an encoding standard. There are many available encodings to choose 050 * from. The desired encoding can either be specified by name, or if no 051 * encoding is selected, the system default encoding will be used. The 052 * system default encoding name is determined from the system property 053 * <code>file.encoding</code>. The only encodings that are guaranteed to 054 * be availalbe are "8859_1" (the Latin-1 character set) and "UTF8". 055 * Unforunately, Java does not provide a mechanism for listing the 056 * ecodings that are supported in a given implementation. 057 * <p> 058 * Here is a list of standard encoding names that may be available: 059 * <p> 060 * <ul> 061 * <li>8859_1 (ISO-8859-1/Latin-1)</li> 062 * <li>8859_2 (ISO-8859-2/Latin-2)</li> 063 * <li>8859_3 (ISO-8859-3/Latin-3)</li> 064 * <li>8859_4 (ISO-8859-4/Latin-4)</li> 065 * <li>8859_5 (ISO-8859-5/Latin-5)</li> 066 * <li>8859_6 (ISO-8859-6/Latin-6)</li> 067 * <li>8859_7 (ISO-8859-7/Latin-7)</li> 068 * <li>8859_8 (ISO-8859-8/Latin-8)</li> 069 * <li>8859_9 (ISO-8859-9/Latin-9)</li> 070 * <li>ASCII (7-bit ASCII)</li> 071 * <li>UTF8 (UCS Transformation Format-8)</li> 072 * <li>More later</li> 073 * </ul> 074 * <p> 075 * It is recommended that applications do not use 076 * <code>InputStreamReader</code>'s 077 * directly. Rather, for efficiency purposes, an object of this class 078 * should be wrapped by a <code>BufferedReader</code>. 079 * <p> 080 * Due to a deficiency the Java class library design, there is no standard 081 * way for an application to install its own byte-character encoding. 082 * 083 * @see BufferedReader 084 * @see InputStream 085 * 086 * @author Aaron M. Renn (arenn@urbanophile.com) 087 * @author Per Bothner (bothner@cygnus.com) 088 * @date April 22, 1998. 089 */ 090public class InputStreamReader extends Reader 091{ 092 BufferedInputStream in; 093 094 // Buffer of chars read from in and converted but not consumed. 095 char[] work; 096 // Next available character (in work buffer) to read. 097 int wpos; 098 // Last available character (in work buffer) to read. 099 int wcount; 100 101 /* 102 * This is the byte-character decoder class that does the reading and 103 * translation of bytes from the underlying stream. 104 */ 105 BytesToUnicode converter; 106 107 /** 108 * This method initializes a new instance of <code>InputStreamReader</code> 109 * to read from the specified stream using the default encoding. 110 * 111 * @param in The <code>InputStream</code> to read from 112 */ 113 public InputStreamReader(InputStream in) 114 { 115 this(in, BytesToUnicode.getDefaultDecoder()); 116 } 117 118 /** 119 * This method initializes a new instance of <code>InputStreamReader</code> 120 * to read from the specified stream using a caller supplied character 121 * encoding scheme. Note that due to a deficiency in the Java language 122 * design, there is no way to determine which encodings are supported. 123 * 124 * @param in The <code>InputStream</code> to read from 125 * @param encoding_name The name of the encoding scheme to use 126 * 127 * @exception UnsupportedEncodingException If the encoding scheme 128 * requested is not available. 129 */ 130 public InputStreamReader(InputStream in, String encoding_name) 131 throws UnsupportedEncodingException 132 { 133 this(in, BytesToUnicode.getDecoder(encoding_name)); 134 } 135 136 /** 137 * Creates an InputStreamReader that uses a decoder of the given 138 * charset to decode the bytes in the InputStream into 139 * characters. 140 */ 141 public InputStreamReader(InputStream in, Charset charset) 142 { 143 this(in, new BytesToCharsetAdaptor(charset)); 144 } 145 146 /** 147 * Creates an InputStreamReader that uses the given charset decoder 148 * to decode the bytes in the InputStream into characters. 149 */ 150 public InputStreamReader(InputStream in, CharsetDecoder decoder) 151 { 152 this(in, new BytesToCharsetAdaptor(decoder)); 153 } 154 155 private InputStreamReader(InputStream in, BytesToUnicode decoder) 156 { 157 // FIXME: someone could pass in a BufferedInputStream whose buffer 158 // is smaller than the longest encoded character for this 159 // encoding. We will probably go into an infinite loop in this 160 // case. We probably ought to just have our own byte buffering 161 // here. 162 this.in = in instanceof BufferedInputStream 163 ? (BufferedInputStream) in 164 : new BufferedInputStream(in); 165 /* Don't need to call super(in) here as long as the lock gets set. */ 166 this.lock = in; 167 converter = decoder; 168 converter.setInput(this.in.buf, 0, 0); 169 } 170 171 /** 172 * This method closes this stream, as well as the underlying 173 * <code>InputStream</code>. 174 * 175 * @exception IOException If an error occurs 176 */ 177 public void close() throws IOException 178 { 179 synchronized (lock) 180 { 181 if (in != null) 182 in.close(); 183 in = null; 184 work = null; 185 wpos = wcount = 0; 186 } 187 } 188 189 /** 190 * This method returns the name of the encoding that is currently in use 191 * by this object. If the stream has been closed, this method is allowed 192 * to return <code>null</code>. 193 * 194 * @return The current encoding name 195 */ 196 public String getEncoding() 197 { 198 return in != null ? converter.getName() : null; 199 } 200 201 /** 202 * This method checks to see if the stream is read to be read. It 203 * will return <code>true</code> if is, or <code>false</code> if it is not. 204 * If the stream is not ready to be read, it could (although is not required 205 * to) block on the next read attempt. 206 * 207 * @return <code>true</code> if the stream is ready to be read, 208 * <code>false</code> otherwise 209 * 210 * @exception IOException If an error occurs 211 */ 212 public boolean ready() throws IOException 213 { 214 synchronized (lock) 215 { 216 if (in == null) 217 throw new IOException("Stream closed"); 218 219 if (wpos < wcount) 220 return true; 221 222 // According to the spec, an InputStreamReader is ready if its 223 // input buffer is not empty (above), or if bytes are 224 // available on the underlying byte stream. 225 return in.available () > 0; 226 } 227 } 228 229 /** 230 * This method reads up to <code>length</code> characters from the stream into 231 * the specified array starting at index <code>offset</code> into the 232 * array. 233 * 234 * @param buf The character array to recieve the data read 235 * @param offset The offset into the array to start storing characters 236 * @param length The requested number of characters to read. 237 * 238 * @return The actual number of characters read, or -1 if end of stream. 239 * 240 * @exception IOException If an error occurs 241 */ 242 public int read (char[] buf, int offset, int length) throws IOException 243 { 244 synchronized (lock) 245 { 246 if (in == null) 247 throw new IOException("Stream closed"); 248 249 if (length == 0) 250 return 0; 251 252 int wavail = wcount - wpos; 253 if (wavail <= 0) 254 { 255 // Nothing waiting, so refill their buffer. 256 return refill(buf, offset, length); 257 } 258 259 if (length > wavail) 260 length = wavail; 261 System.arraycopy(work, wpos, buf, offset, length); 262 wpos += length; 263 return length; 264 } 265 } 266 267 /** 268 * This method reads a single character of data from the stream. 269 * 270 * @return The char read, as an int, or -1 if end of stream. 271 * 272 * @exception IOException If an error occurs 273 */ 274 public int read() throws IOException 275 { 276 synchronized (lock) 277 { 278 if (in == null) 279 throw new IOException("Stream closed"); 280 281 int wavail = wcount - wpos; 282 if (wavail <= 0) 283 { 284 // Nothing waiting, so refill our internal buffer. 285 wpos = wcount = 0; 286 if (work == null) 287 work = new char[100]; 288 int count = refill(work, 0, work.length); 289 if (count == -1) 290 return -1; 291 wcount += count; 292 } 293 294 return work[wpos++]; 295 } 296 } 297 298 // Read more bytes and convert them into the specified buffer. 299 // Returns the number of converted characters or -1 on EOF. 300 private int refill(char[] buf, int offset, int length) throws IOException 301 { 302 for (;;) 303 { 304 // We have knowledge of the internals of BufferedInputStream 305 // here. Eww. 306 // BufferedInputStream.refill() can only be called when 307 // `pos>=count'. 308 boolean r = in.pos < in.count || in.refill (); 309 if (! r) 310 return -1; 311 converter.setInput(in.buf, in.pos, in.count); 312 int count = converter.read(buf, offset, length); 313 314 // We might have bytes but not have made any progress. In 315 // this case we try to refill. If refilling fails, we assume 316 // we have a malformed character at the end of the stream. 317 if (count == 0 && converter.inpos == in.pos) 318 { 319 in.mark(in.count); 320 if (! in.refill ()) 321 throw new CharConversionException (); 322 in.reset(); 323 } 324 else 325 { 326 in.skip(converter.inpos - in.pos); 327 if (count > 0) 328 return count; 329 } 330 } 331 } 332}