001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * https://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.io.output; 018 019import java.io.BufferedWriter; 020import java.io.IOException; 021import java.io.InputStreamReader; 022import java.io.OutputStream; 023import java.io.OutputStreamWriter; 024import java.io.Writer; 025import java.nio.ByteBuffer; 026import java.nio.CharBuffer; 027import java.nio.charset.Charset; 028import java.nio.charset.CharsetDecoder; 029import java.nio.charset.CoderResult; 030import java.nio.charset.CodingErrorAction; 031import java.nio.charset.StandardCharsets; 032 033import org.apache.commons.io.Charsets; 034import org.apache.commons.io.IOUtils; 035import org.apache.commons.io.build.AbstractStreamBuilder; 036import org.apache.commons.io.charset.CharsetDecoders; 037 038/** 039 * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to 040 * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled 041 * correctly. 042 * <p> 043 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in 044 * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()} 045 * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link BufferedWriter}. {@link WriterOutputStream} can 046 * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer}, 047 * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}. 048 * </p> 049 * <p> 050 * {@link WriterOutputStream} implements the inverse transformation of {@link OutputStreamWriter}; in the following example, writing to {@code out2} 051 * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding): 052 * </p> 053 * <p> 054 * To build an instance, use {@link Builder}. 055 * </p> 056 * <pre> 057 * OutputStream out = ... 058 * Charset cs = ... 059 * OutputStreamWriter writer = new OutputStreamWriter(out, cs); 060 * WriterOutputStream out2 = WriterOutputStream.builder() 061 * .setWriter(writer) 062 * .setCharset(cs) 063 * .get(); 064 * </pre> 065 * <p> 066 * {@link WriterOutputStream} implements the same transformation as {@link InputStreamReader}, except that the control flow is reversed: both classes 067 * transform a byte stream into a character stream, but {@link InputStreamReader} pulls data from the underlying stream, while 068 * {@link WriterOutputStream} pushes it to the underlying stream. 069 * </p> 070 * <p> 071 * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in 072 * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is 073 * known to represent character data that must be decoded for further use. 074 * </p> 075 * <p> 076 * Instances of {@link WriterOutputStream} are not thread safe. 077 * </p> 078 * 079 * @see Builder 080 * @see org.apache.commons.io.input.ReaderInputStream 081 * @since 2.0 082 */ 083public class WriterOutputStream extends OutputStream { 084 085 // @formatter:off 086 /** 087 * Builds a new {@link WriterOutputStream}. 088 * 089 * <p> 090 * For example: 091 * </p> 092 * <pre>{@code 093 * WriterOutputStream s = WriterOutputStream.builder() 094 * .setPath(path) 095 * .setBufferSize(8192) 096 * .setCharset(StandardCharsets.UTF_8) 097 * .setWriteImmediately(false) 098 * .get();} 099 * </pre> 100 * 101 * @see #get() 102 * @since 2.12.0 103 */ 104 // @formatter:on 105 public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> { 106 107 private CharsetDecoder charsetDecoder; 108 private boolean writeImmediately; 109 110 /** 111 * Constructs a new builder of {@link WriterOutputStream}. 112 */ 113 public Builder() { 114 this.charsetDecoder = getCharset().newDecoder(); 115 } 116 117 /** 118 * Builds a new {@link WriterOutputStream}. 119 * <p> 120 * You must set an aspect that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception. 121 * </p> 122 * <p> 123 * This builder uses the following aspects: 124 * </p> 125 * <ul> 126 * <li>{@link #getWriter()}</li> 127 * <li>{@link #getBufferSize()}</li> 128 * <li>charsetDecoder</li> 129 * <li>writeImmediately</li> 130 * </ul> 131 * 132 * @return a new instance. 133 * @throws UnsupportedOperationException if the origin cannot provide a {@link Writer}. 134 * @throws IOException if an I/O error occurs converting to an {@link Writer} using {@link #getWriter()}. 135 * @see #getWriter() 136 * @see #getUnchecked() 137 */ 138 @Override 139 public WriterOutputStream get() throws IOException { 140 return new WriterOutputStream(this); 141 } 142 143 @Override 144 public Builder setCharset(final Charset charset) { 145 super.setCharset(charset); 146 this.charsetDecoder = newDecoder(getCharset()); 147 return this; 148 } 149 150 @Override 151 public Builder setCharset(final String charset) { 152 super.setCharset(charset); 153 this.charsetDecoder = newDecoder(getCharset()); 154 return this; 155 } 156 157 /** 158 * Sets the charset decoder. 159 * 160 * @param charsetDecoder the charset decoder. 161 * @return {@code this} instance. 162 */ 163 public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) { 164 this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder(); 165 super.setCharset(this.charsetDecoder.charset()); 166 return this; 167 } 168 169 /** 170 * Sets whether the output buffer will be flushed after each write operation ({@code true}), meaning all available data will be written to the 171 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or 172 * {@link #close()} is called. 173 * 174 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to 175 * the underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 176 * {@link #flush()} or {@link #close()} is called. 177 * @return {@code this} instance. 178 */ 179 public Builder setWriteImmediately(final boolean writeImmediately) { 180 this.writeImmediately = writeImmediately; 181 return this; 182 } 183 184 } 185 186 private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE; 187 188 /** 189 * Constructs a new {@link Builder}. 190 * 191 * @return a new {@link Builder}. 192 * @since 2.12.0 193 */ 194 public static Builder builder() { 195 return new Builder(); 196 } 197 198 /** 199 * Checks if the JDK in use properly supports the given charset. 200 * 201 * @param charset the charset to check the support for. 202 */ 203 private static void checkIbmJdkWithBrokenUTF16(final Charset charset) { 204 if (!StandardCharsets.UTF_16.name().equals(charset.name())) { 205 return; 206 } 207 final String testString = "v\u00e9s"; 208 final byte[] bytes = testString.getBytes(charset); 209 210 final CharsetDecoder charsetDecoder2 = charset.newDecoder(); 211 final ByteBuffer bb2 = ByteBuffer.allocate(16); 212 final CharBuffer cb2 = CharBuffer.allocate(testString.length()); 213 final int len = bytes.length; 214 for (int i = 0; i < len; i++) { 215 bb2.put(bytes[i]); 216 bb2.flip(); 217 try { 218 charsetDecoder2.decode(bb2, cb2, i == len - 1); 219 } catch (final IllegalArgumentException e) { 220 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " 221 + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 222 } 223 bb2.compact(); 224 } 225 cb2.rewind(); 226 if (!testString.equals(cb2.toString())) { 227 throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. " 228 + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream"); 229 } 230 231 } 232 233 private static CharsetDecoder newDecoder(final Charset charset) { 234 // @formatter:off 235 return Charsets.toCharset(charset).newDecoder() 236 .onMalformedInput(CodingErrorAction.REPLACE) 237 .onUnmappableCharacter(CodingErrorAction.REPLACE) 238 .replaceWith("?"); 239 // @formatter:on 240 } 241 242 private final Writer writer; 243 244 private final CharsetDecoder decoder; 245 246 private final boolean writeImmediately; 247 248 /** 249 * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder. 250 */ 251 private final ByteBuffer decoderIn = ByteBuffer.allocate(128); 252 253 /** 254 * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer. 255 */ 256 private final CharBuffer decoderOut; 257 258 @SuppressWarnings("resource") // caller closes. 259 private WriterOutputStream(final Builder builder) throws IOException { 260 this(builder.getWriter(), builder.charsetDecoder, builder.getBufferSize(), builder.writeImmediately); 261 } 262 263 /** 264 * Constructs a new {@link WriterOutputStream} that uses the virtual machine's {@linkplain Charset#defaultCharset() default charset} and with a default 265 * output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or 266 * {@link #close()} is called. 267 * 268 * @param writer the target {@link Writer}. 269 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}. 270 */ 271 @Deprecated 272 public WriterOutputStream(final Writer writer) { 273 this(writer, Charset.defaultCharset(), BUFFER_SIZE, false); 274 } 275 276 /** 277 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 278 * when it overflows or when {@link #flush()} or {@link #close()} is called. 279 * 280 * @param writer the target {@link Writer}. 281 * @param charset the charset encoding. 282 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}. 283 */ 284 @Deprecated 285 public WriterOutputStream(final Writer writer, final Charset charset) { 286 this(writer, charset, BUFFER_SIZE, false); 287 } 288 289 /** 290 * Constructs a new {@link WriterOutputStream}. 291 * 292 * @param writer the target {@link Writer}. 293 * @param charset the charset encoding. 294 * @param bufferSize the size of the output buffer in number of characters. 295 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the 296 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 297 * {@link #flush()} or {@link #close()} is called. 298 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}. 299 */ 300 @Deprecated 301 public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) { 302 this(writer, newDecoder(charset), bufferSize, writeImmediately); 303 } 304 305 /** 306 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 307 * when it overflows or when {@link #flush()} or {@link #close()} is called. 308 * 309 * @param writer the target {@link Writer}. 310 * @param decoder the charset decoder. 311 * @since 2.1 312 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}. 313 */ 314 @Deprecated 315 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) { 316 this(writer, decoder, BUFFER_SIZE, false); 317 } 318 319 /** 320 * Constructs a new {@link WriterOutputStream}. 321 * 322 * @param writer the target {@link Writer}. 323 * @param decoder the charset decoder. 324 * @param bufferSize the size of the output buffer in number of characters. 325 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the 326 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 327 * {@link #flush()} or {@link #close()} is called. 328 * @since 2.1 329 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}. 330 */ 331 @Deprecated 332 public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) { 333 checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset()); 334 this.writer = writer; 335 this.decoder = CharsetDecoders.toCharsetDecoder(decoder); 336 this.writeImmediately = writeImmediately; 337 this.decoderOut = CharBuffer.allocate(bufferSize); 338 } 339 340 /** 341 * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed 342 * when it overflows or when {@link #flush()} or {@link #close()} is called. 343 * 344 * @param writer the target {@link Writer}. 345 * @param charsetName the name of the charset encoding. 346 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}. 347 */ 348 @Deprecated 349 public WriterOutputStream(final Writer writer, final String charsetName) { 350 this(writer, charsetName, BUFFER_SIZE, false); 351 } 352 353 /** 354 * Constructs a new {@link WriterOutputStream}. 355 * 356 * @param writer the target {@link Writer}. 357 * @param charsetName the name of the charset encoding. 358 * @param bufferSize the size of the output buffer in number of characters. 359 * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the 360 * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when 361 * {@link #flush()} or {@link #close()} is called. 362 * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}. 363 */ 364 @Deprecated 365 public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) { 366 this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately); 367 } 368 369 /** 370 * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that 371 * {@link Writer#close()} will be called. 372 * 373 * @throws IOException if an I/O error occurs. 374 */ 375 @Override 376 public void close() throws IOException { 377 processInput(true); 378 flushOutput(); 379 writer.close(); 380 } 381 382 /** 383 * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that 384 * {@link Writer#flush()} will be called. 385 * 386 * @throws IOException if an I/O error occurs. 387 */ 388 @Override 389 public void flush() throws IOException { 390 flushOutput(); 391 writer.flush(); 392 } 393 394 /** 395 * Flush the output. 396 * 397 * @throws IOException if an I/O error occurs. 398 */ 399 private void flushOutput() throws IOException { 400 if (decoderOut.position() > 0) { 401 writer.write(decoderOut.array(), 0, decoderOut.position()); 402 decoderOut.rewind(); 403 } 404 } 405 406 /** 407 * Decode the contents of the input ByteBuffer into a CharBuffer. 408 * 409 * @param endOfInput indicates end of input. 410 * @throws IOException if an I/O error occurs. 411 */ 412 private void processInput(final boolean endOfInput) throws IOException { 413 // Prepare decoderIn for reading 414 decoderIn.flip(); 415 CoderResult coderResult; 416 while (true) { 417 coderResult = decoder.decode(decoderIn, decoderOut, endOfInput); 418 if (coderResult.isOverflow()) { 419 flushOutput(); 420 } else if (coderResult.isUnderflow()) { 421 break; 422 } else { 423 // The decoder is configured to replace malformed input and unmappable characters, 424 // so we should not get here. 425 throw new IOException("Unexpected coder result"); 426 } 427 } 428 // Discard the bytes that have been read 429 decoderIn.compact(); 430 } 431 432 /** 433 * Writes bytes from the specified byte array to the stream. 434 * 435 * @param b the byte array containing the bytes to write. 436 * @throws NullPointerException if the byte array is {@code null}. 437 * @throws IOException if an I/O error occurs. 438 */ 439 @Override 440 public void write(final byte[] b) throws IOException { 441 write(b, 0, b.length); 442 } 443 444 /** 445 * Writes bytes from the specified byte array to the stream. 446 * 447 * @param b the byte array containing the bytes to write. 448 * @param off the start offset in the byte array. 449 * @param len the number of bytes to write. 450 * @throws NullPointerException if the byte array is {@code null}. 451 * @throws IndexOutOfBoundsException if {@code off} or {@code len} are negative, or if {@code off + len} is greater than {@code b.length}. 452 * @throws IOException if an I/O error occurs. 453 */ 454 @Override 455 public void write(final byte[] b, int off, int len) throws IOException { 456 IOUtils.checkFromIndexSize(b, off, len); 457 while (len > 0) { 458 final int c = Math.min(len, decoderIn.remaining()); 459 decoderIn.put(b, off, c); 460 processInput(false); 461 len -= c; 462 off += c; 463 } 464 if (writeImmediately) { 465 flushOutput(); 466 } 467 } 468 469 /** 470 * Writes a single byte to the stream. 471 * 472 * @param b the byte to write. 473 * @throws IOException if an I/O error occurs. 474 */ 475 @Override 476 public void write(final int b) throws IOException { 477 write(new byte[] { (byte) b }, 0, 1); 478 } 479}