001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      https://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.io.output;
018
019import java.io.BufferedWriter;
020import java.io.IOException;
021import java.io.InputStreamReader;
022import java.io.OutputStream;
023import java.io.OutputStreamWriter;
024import java.io.Writer;
025import java.nio.ByteBuffer;
026import java.nio.CharBuffer;
027import java.nio.charset.Charset;
028import java.nio.charset.CharsetDecoder;
029import java.nio.charset.CoderResult;
030import java.nio.charset.CodingErrorAction;
031import java.nio.charset.StandardCharsets;
032
033import org.apache.commons.io.Charsets;
034import org.apache.commons.io.IOUtils;
035import org.apache.commons.io.build.AbstractStreamBuilder;
036import org.apache.commons.io.charset.CharsetDecoders;
037
038/**
039 * {@link OutputStream} implementation that transforms a byte stream to a character stream using a specified charset encoding and writes the resulting stream to
040 * a {@link Writer}. The stream is transformed using a {@link CharsetDecoder} object, guaranteeing that all charset encodings supported by the JRE are handled
041 * correctly.
042 * <p>
043 * The output of the {@link CharsetDecoder} is buffered using a fixed size buffer. This implies that the data is written to the underlying {@link Writer} in
044 * chunks that are no larger than the size of this buffer. By default, the buffer is flushed only when it overflows or when {@link #flush()} or {@link #close()}
045 * is called. In general there is therefore no need to wrap the underlying {@link Writer} in a {@link BufferedWriter}. {@link WriterOutputStream} can
046 * also be instructed to flush the buffer after each write operation. In this case, all available data is written immediately to the underlying {@link Writer},
047 * implying that the current position of the {@link Writer} is correlated to the current position of the {@link WriterOutputStream}.
048 * </p>
049 * <p>
050 * {@link WriterOutputStream} implements the inverse transformation of {@link OutputStreamWriter}; in the following example, writing to {@code out2}
051 * would have the same result as writing to {@code out} directly (provided that the byte sequence is legal with respect to the charset encoding):
052 * </p>
053 * <p>
054 * To build an instance, use {@link Builder}.
055 * </p>
056 * <pre>
057 * OutputStream out = ...
058 * Charset cs = ...
059 * OutputStreamWriter writer = new OutputStreamWriter(out, cs);
060 * WriterOutputStream out2 = WriterOutputStream.builder()
061 *   .setWriter(writer)
062 *   .setCharset(cs)
063 *   .get();
064 * </pre>
065 * <p>
066 * {@link WriterOutputStream} implements the same transformation as {@link InputStreamReader}, except that the control flow is reversed: both classes
067 * transform a byte stream into a character stream, but {@link InputStreamReader} pulls data from the underlying stream, while
068 * {@link WriterOutputStream} pushes it to the underlying stream.
069 * </p>
070 * <p>
071 * Note that while there are use cases where there is no alternative to using this class, very often the need to use this class is an indication of a flaw in
072 * the design of the code. This class is typically used in situations where an existing API only accepts an {@link OutputStream} object, but where the stream is
073 * known to represent character data that must be decoded for further use.
074 * </p>
075 * <p>
076 * Instances of {@link WriterOutputStream} are not thread safe.
077 * </p>
078 *
079 * @see Builder
080 * @see org.apache.commons.io.input.ReaderInputStream
081 * @since 2.0
082 */
083public class WriterOutputStream extends OutputStream {
084
085    // @formatter:off
086    /**
087     * Builds a new {@link WriterOutputStream}.
088     *
089     * <p>
090     * For example:
091     * </p>
092     * <pre>{@code
093     * WriterOutputStream s = WriterOutputStream.builder()
094     *   .setPath(path)
095     *   .setBufferSize(8192)
096     *   .setCharset(StandardCharsets.UTF_8)
097     *   .setWriteImmediately(false)
098     *   .get();}
099     * </pre>
100     *
101     * @see #get()
102     * @since 2.12.0
103     */
104    // @formatter:on
105    public static class Builder extends AbstractStreamBuilder<WriterOutputStream, Builder> {
106
107        private CharsetDecoder charsetDecoder;
108        private boolean writeImmediately;
109
110        /**
111         * Constructs a new builder of {@link WriterOutputStream}.
112         */
113        public Builder() {
114            this.charsetDecoder = getCharset().newDecoder();
115        }
116
117        /**
118         * Builds a new {@link WriterOutputStream}.
119         * <p>
120         * You must set an aspect that supports {@link #getWriter()} on this builder, otherwise, this method throws an exception.
121         * </p>
122         * <p>
123         * This builder uses the following aspects:
124         * </p>
125         * <ul>
126         * <li>{@link #getWriter()}</li>
127         * <li>{@link #getBufferSize()}</li>
128         * <li>charsetDecoder</li>
129         * <li>writeImmediately</li>
130         * </ul>
131         *
132         * @return a new instance.
133         * @throws UnsupportedOperationException if the origin cannot provide a {@link Writer}.
134         * @throws IOException                   if an I/O error occurs converting to an {@link Writer} using {@link #getWriter()}.
135         * @see #getWriter()
136         * @see #getUnchecked()
137         */
138        @Override
139        public WriterOutputStream get() throws IOException {
140            return new WriterOutputStream(this);
141        }
142
143        @Override
144        public Builder setCharset(final Charset charset) {
145            super.setCharset(charset);
146            this.charsetDecoder = newDecoder(getCharset());
147            return this;
148        }
149
150        @Override
151        public Builder setCharset(final String charset) {
152            super.setCharset(charset);
153            this.charsetDecoder = newDecoder(getCharset());
154            return this;
155        }
156
157        /**
158         * Sets the charset decoder.
159         *
160         * @param charsetDecoder the charset decoder.
161         * @return {@code this} instance.
162         */
163        public Builder setCharsetDecoder(final CharsetDecoder charsetDecoder) {
164            this.charsetDecoder = charsetDecoder != null ? charsetDecoder : getCharsetDefault().newDecoder();
165            super.setCharset(this.charsetDecoder.charset());
166            return this;
167        }
168
169        /**
170         * Sets whether the output buffer will be flushed after each write operation ({@code true}), meaning all available data will be written to the
171         * underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when {@link #flush()} or
172         * {@link #close()} is called.
173         *
174         * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to
175         *                         the underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
176         *                         {@link #flush()} or {@link #close()} is called.
177         * @return {@code this} instance.
178         */
179        public Builder setWriteImmediately(final boolean writeImmediately) {
180            this.writeImmediately = writeImmediately;
181            return this;
182        }
183
184    }
185
186    private static final int BUFFER_SIZE = IOUtils.DEFAULT_BUFFER_SIZE;
187
188    /**
189     * Constructs a new {@link Builder}.
190     *
191     * @return a new {@link Builder}.
192     * @since 2.12.0
193     */
194    public static Builder builder() {
195        return new Builder();
196    }
197
198    /**
199     * Checks if the JDK in use properly supports the given charset.
200     *
201     * @param charset the charset to check the support for.
202     */
203    private static void checkIbmJdkWithBrokenUTF16(final Charset charset) {
204        if (!StandardCharsets.UTF_16.name().equals(charset.name())) {
205            return;
206        }
207        final String testString = "v\u00e9s";
208        final byte[] bytes = testString.getBytes(charset);
209
210        final CharsetDecoder charsetDecoder2 = charset.newDecoder();
211        final ByteBuffer bb2 = ByteBuffer.allocate(16);
212        final CharBuffer cb2 = CharBuffer.allocate(testString.length());
213        final int len = bytes.length;
214        for (int i = 0; i < len; i++) {
215            bb2.put(bytes[i]);
216            bb2.flip();
217            try {
218                charsetDecoder2.decode(bb2, cb2, i == len - 1);
219            } catch (final IllegalArgumentException e) {
220                throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
221                        + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
222            }
223            bb2.compact();
224        }
225        cb2.rewind();
226        if (!testString.equals(cb2.toString())) {
227            throw new UnsupportedOperationException("UTF-16 requested when running on an IBM JDK with broken UTF-16 support. "
228                    + "Please find a JDK that supports UTF-16 if you intend to use UF-16 with WriterOutputStream");
229        }
230
231    }
232
233    private static CharsetDecoder newDecoder(final Charset charset) {
234        // @formatter:off
235        return Charsets.toCharset(charset).newDecoder()
236            .onMalformedInput(CodingErrorAction.REPLACE)
237            .onUnmappableCharacter(CodingErrorAction.REPLACE)
238            .replaceWith("?");
239        // @formatter:on
240    }
241
242    private final Writer writer;
243
244    private final CharsetDecoder decoder;
245
246    private final boolean writeImmediately;
247
248    /**
249     * ByteBuffer used as input for the decoder. This buffer can be small as it is used only to transfer the received data to the decoder.
250     */
251    private final ByteBuffer decoderIn = ByteBuffer.allocate(128);
252
253    /**
254     * CharBuffer used as output for the decoder. It should be somewhat larger as we write from this buffer to the underlying Writer.
255     */
256    private final CharBuffer decoderOut;
257
258    @SuppressWarnings("resource") // caller closes.
259    private WriterOutputStream(final Builder builder) throws IOException {
260        this(builder.getWriter(), builder.charsetDecoder, builder.getBufferSize(), builder.writeImmediately);
261    }
262
263    /**
264     * Constructs a new {@link WriterOutputStream} that uses the virtual machine's {@linkplain Charset#defaultCharset() default charset} and with a default
265     * output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed when it overflows or when {@link #flush()} or
266     * {@link #close()} is called.
267     *
268     * @param writer the target {@link Writer}.
269     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
270     */
271    @Deprecated
272    public WriterOutputStream(final Writer writer) {
273        this(writer, Charset.defaultCharset(), BUFFER_SIZE, false);
274    }
275
276    /**
277     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
278     * when it overflows or when {@link #flush()} or {@link #close()} is called.
279     *
280     * @param writer  the target {@link Writer}.
281     * @param charset the charset encoding.
282     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
283     */
284    @Deprecated
285    public WriterOutputStream(final Writer writer, final Charset charset) {
286        this(writer, charset, BUFFER_SIZE, false);
287    }
288
289    /**
290     * Constructs a new {@link WriterOutputStream}.
291     *
292     * @param writer           the target {@link Writer}.
293     * @param charset          the charset encoding.
294     * @param bufferSize       the size of the output buffer in number of characters.
295     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
296     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
297     *                         {@link #flush()} or {@link #close()} is called.
298     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
299     */
300    @Deprecated
301    public WriterOutputStream(final Writer writer, final Charset charset, final int bufferSize, final boolean writeImmediately) {
302        this(writer, newDecoder(charset), bufferSize, writeImmediately);
303    }
304
305    /**
306     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
307     * when it overflows or when {@link #flush()} or {@link #close()} is called.
308     *
309     * @param writer  the target {@link Writer}.
310     * @param decoder the charset decoder.
311     * @since 2.1
312     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
313     */
314    @Deprecated
315    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder) {
316        this(writer, decoder, BUFFER_SIZE, false);
317    }
318
319    /**
320     * Constructs a new {@link WriterOutputStream}.
321     *
322     * @param writer           the target {@link Writer}.
323     * @param decoder          the charset decoder.
324     * @param bufferSize       the size of the output buffer in number of characters.
325     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
326     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
327     *                         {@link #flush()} or {@link #close()} is called.
328     * @since 2.1
329     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
330     */
331    @Deprecated
332    public WriterOutputStream(final Writer writer, final CharsetDecoder decoder, final int bufferSize, final boolean writeImmediately) {
333        checkIbmJdkWithBrokenUTF16(CharsetDecoders.toCharsetDecoder(decoder).charset());
334        this.writer = writer;
335        this.decoder = CharsetDecoders.toCharsetDecoder(decoder);
336        this.writeImmediately = writeImmediately;
337        this.decoderOut = CharBuffer.allocate(bufferSize);
338    }
339
340    /**
341     * Constructs a new {@link WriterOutputStream} with a default output buffer size of {@value #BUFFER_SIZE} characters. The output buffer will only be flushed
342     * when it overflows or when {@link #flush()} or {@link #close()} is called.
343     *
344     * @param writer      the target {@link Writer}.
345     * @param charsetName the name of the charset encoding.
346     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
347     */
348    @Deprecated
349    public WriterOutputStream(final Writer writer, final String charsetName) {
350        this(writer, charsetName, BUFFER_SIZE, false);
351    }
352
353    /**
354     * Constructs a new {@link WriterOutputStream}.
355     *
356     * @param writer           the target {@link Writer}.
357     * @param charsetName      the name of the charset encoding.
358     * @param bufferSize       the size of the output buffer in number of characters.
359     * @param writeImmediately If {@code true} the output buffer will be flushed after each write operation, meaning all available data will be written to the
360     *                         underlying {@link Writer} immediately. If {@code false}, the output buffer will only be flushed when it overflows or when
361     *                         {@link #flush()} or {@link #close()} is called.
362     * @deprecated Use {@link #builder()}, {@link Builder}, and {@link Builder#get()}.
363     */
364    @Deprecated
365    public WriterOutputStream(final Writer writer, final String charsetName, final int bufferSize, final boolean writeImmediately) {
366        this(writer, Charsets.toCharset(charsetName), bufferSize, writeImmediately);
367    }
368
369    /**
370     * Close the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
371     * {@link Writer#close()} will be called.
372     *
373     * @throws IOException if an I/O error occurs.
374     */
375    @Override
376    public void close() throws IOException {
377        processInput(true);
378        flushOutput();
379        writer.close();
380    }
381
382    /**
383     * Flush the stream. Any remaining content accumulated in the output buffer will be written to the underlying {@link Writer}. After that
384     * {@link Writer#flush()} will be called.
385     *
386     * @throws IOException if an I/O error occurs.
387     */
388    @Override
389    public void flush() throws IOException {
390        flushOutput();
391        writer.flush();
392    }
393
394    /**
395     * Flush the output.
396     *
397     * @throws IOException if an I/O error occurs.
398     */
399    private void flushOutput() throws IOException {
400        if (decoderOut.position() > 0) {
401            writer.write(decoderOut.array(), 0, decoderOut.position());
402            decoderOut.rewind();
403        }
404    }
405
406    /**
407     * Decode the contents of the input ByteBuffer into a CharBuffer.
408     *
409     * @param endOfInput indicates end of input.
410     * @throws IOException if an I/O error occurs.
411     */
412    private void processInput(final boolean endOfInput) throws IOException {
413        // Prepare decoderIn for reading
414        decoderIn.flip();
415        CoderResult coderResult;
416        while (true) {
417            coderResult = decoder.decode(decoderIn, decoderOut, endOfInput);
418            if (coderResult.isOverflow()) {
419                flushOutput();
420            } else if (coderResult.isUnderflow()) {
421                break;
422            } else {
423                // The decoder is configured to replace malformed input and unmappable characters,
424                // so we should not get here.
425                throw new IOException("Unexpected coder result");
426            }
427        }
428        // Discard the bytes that have been read
429        decoderIn.compact();
430    }
431
432    /**
433     * Writes bytes from the specified byte array to the stream.
434     *
435     * @param b the byte array containing the bytes to write.
436     * @throws NullPointerException if the byte array is {@code null}.
437     * @throws IOException if an I/O error occurs.
438     */
439    @Override
440    public void write(final byte[] b) throws IOException {
441        write(b, 0, b.length);
442    }
443
444    /**
445     * Writes bytes from the specified byte array to the stream.
446     *
447     * @param b   the byte array containing the bytes to write.
448     * @param off the start offset in the byte array.
449     * @param len the number of bytes to write.
450     * @throws NullPointerException      if the byte array is {@code null}.
451     * @throws IndexOutOfBoundsException if {@code off} or {@code len} are negative, or if {@code off + len} is greater than {@code b.length}.
452     * @throws IOException if an I/O error occurs.
453     */
454    @Override
455    public void write(final byte[] b, int off, int len) throws IOException {
456        IOUtils.checkFromIndexSize(b, off, len);
457        while (len > 0) {
458            final int c = Math.min(len, decoderIn.remaining());
459            decoderIn.put(b, off, c);
460            processInput(false);
461            len -= c;
462            off += c;
463        }
464        if (writeImmediately) {
465            flushOutput();
466        }
467    }
468
469    /**
470     * Writes a single byte to the stream.
471     *
472     * @param b the byte to write.
473     * @throws IOException if an I/O error occurs.
474     */
475    @Override
476    public void write(final int b) throws IOException {
477        write(new byte[] { (byte) b }, 0, 1);
478    }
479}