001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2024 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.api;
021
022import java.io.BufferedReader;
023import java.io.File;
024import java.io.FileNotFoundException;
025import java.io.IOException;
026import java.io.InputStream;
027import java.io.InputStreamReader;
028import java.io.Reader;
029import java.io.StringReader;
030import java.nio.charset.Charset;
031import java.nio.charset.CharsetDecoder;
032import java.nio.charset.CodingErrorAction;
033import java.nio.charset.UnsupportedCharsetException;
034import java.nio.file.Files;
035import java.util.ArrayList;
036import java.util.Arrays;
037import java.util.List;
038import java.util.regex.Matcher;
039import java.util.regex.Pattern;
040
041import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
042
043/**
044 * Represents the text contents of a file of arbitrary plain text type.
045 *
046 * <p>
047 * This class will be passed to instances of class FileSetCheck by
048 * Checker.
049 * </p>
050 *
051 */
052public final class FileText {
053
054    /**
055     * The number of characters to read in one go.
056     */
057    private static final int READ_BUFFER_SIZE = 1024;
058
059    /**
060     * Regular expression pattern matching all line terminators.
061     */
062    private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?");
063
064    // For now, we always keep both full text and lines array.
065    // In the long run, however, the one passed at initialization might be
066    // enough, while the other could be lazily created when requested.
067    // This would save memory but cost CPU cycles.
068
069    /**
070     * The name of the file.
071     * {@code null} if no file name is available for whatever reason.
072     */
073    private final File file;
074
075    /**
076     * The charset used to read the file.
077     * {@code null} if the file was reconstructed from a list of lines.
078     */
079    private final Charset charset;
080
081    /**
082     * The lines of the file, without terminators.
083     */
084    private final String[] lines;
085
086    /**
087     * The full text contents of the file.
088     *
089     * @noinspection FieldMayBeFinal
090     * @noinspectionreason FieldMayBeFinal - field is not final to ease reaching full test coverage.
091     */
092    private String fullText;
093
094    /**
095     * The first position of each line within the full text.
096     */
097    private int[] lineBreaks;
098
099    /**
100     * Copy constructor.
101     *
102     * @param fileText to make copy of
103     */
104    public FileText(FileText fileText) {
105        file = fileText.file;
106        charset = fileText.charset;
107        fullText = fileText.fullText;
108        lines = fileText.lines.clone();
109        if (fileText.lineBreaks != null) {
110            lineBreaks = fileText.lineBreaks.clone();
111        }
112    }
113
114    /**
115     * Compatibility constructor.
116     *
117     * <p>This constructor reconstructs the text of the file by joining
118     * lines with linefeed characters. This process does not restore
119     * the original line terminators and should therefore be avoided.
120     *
121     * @param file the name of the file
122     * @param lines the lines of the text, without terminators
123     * @throws NullPointerException if the lines array is null
124     */
125    public FileText(File file, List<String> lines) {
126        final StringBuilder buf = new StringBuilder(1024);
127        for (final String line : lines) {
128            buf.append(line).append('\n');
129        }
130
131        this.file = file;
132        charset = null;
133        fullText = buf.toString();
134        this.lines = lines.toArray(CommonUtil.EMPTY_STRING_ARRAY);
135    }
136
137    /**
138     * Creates a new file text representation.
139     *
140     * <p>The file will be read using the specified encoding, replacing
141     * malformed input and unmappable characters with the default
142     * replacement character.
143     *
144     * @param file the name of the file
145     * @param charsetName the encoding to use when reading the file
146     * @throws NullPointerException if the text is null
147     * @throws IllegalStateException if the charset is not supported.
148     * @throws IOException if the file could not be read
149     */
150    public FileText(File file, String charsetName) throws IOException {
151        this.file = file;
152
153        // We use our own decoder, to be sure we have complete control
154        // about replacements.
155        final CharsetDecoder decoder;
156        try {
157            charset = Charset.forName(charsetName);
158            decoder = charset.newDecoder();
159            decoder.onMalformedInput(CodingErrorAction.REPLACE);
160            decoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
161        }
162        catch (final UnsupportedCharsetException ex) {
163            final String message = "Unsupported charset: " + charsetName;
164            throw new IllegalStateException(message, ex);
165        }
166
167        fullText = readFile(file, decoder);
168
169        // Use the BufferedReader to break down the lines as this
170        // is about 30% faster than using the
171        // LINE_TERMINATOR.split(fullText, -1) method
172        try (BufferedReader reader = new BufferedReader(new StringReader(fullText))) {
173            final ArrayList<String> textLines = new ArrayList<>();
174            while (true) {
175                final String line = reader.readLine();
176                if (line == null) {
177                    break;
178                }
179                textLines.add(line);
180            }
181            lines = textLines.toArray(CommonUtil.EMPTY_STRING_ARRAY);
182        }
183    }
184
185    /**
186     * Reads file using specific decoder and returns all its content as a String.
187     *
188     * @param inputFile File to read
189     * @param decoder Charset decoder
190     * @return File's text
191     * @throws IOException Unable to open or read the file
192     * @throws FileNotFoundException when inputFile does not exist
193     */
194    private static String readFile(final File inputFile, final CharsetDecoder decoder)
195            throws IOException {
196        if (!inputFile.exists()) {
197            throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)");
198        }
199        final StringBuilder buf = new StringBuilder(1024);
200        final InputStream stream = Files.newInputStream(inputFile.toPath());
201        try (Reader reader = new InputStreamReader(stream, decoder)) {
202            final char[] chars = new char[READ_BUFFER_SIZE];
203            while (true) {
204                final int len = reader.read(chars);
205                if (len == -1) {
206                    break;
207                }
208                buf.append(chars, 0, len);
209            }
210        }
211        return buf.toString();
212    }
213
214    /**
215     * Retrieves a line of the text by its number.
216     * The returned line will not contain a trailing terminator.
217     *
218     * @param lineNo the number of the line to get, starting at zero
219     * @return the line with the given number
220     */
221    public String get(final int lineNo) {
222        return lines[lineNo];
223    }
224
225    /**
226     * Get the name of the file.
227     *
228     * @return an object containing the name of the file
229     */
230    public File getFile() {
231        return file;
232    }
233
234    /**
235     * Get the character set which was used to read the file.
236     * Will be {@code null} for a file reconstructed from its lines.
237     *
238     * @return the charset used when the file was read
239     */
240    public Charset getCharset() {
241        return charset;
242    }
243
244    /**
245     * Retrieve the full text of the file.
246     *
247     * @return the full text of the file
248     */
249    public CharSequence getFullText() {
250        return fullText;
251    }
252
253    /**
254     * Returns an array of all lines.
255     * {@code text.toLinesArray()} is equivalent to
256     * {@code text.toArray(new String[text.size()])}.
257     *
258     * @return an array of all lines of the text
259     */
260    public String[] toLinesArray() {
261        return lines.clone();
262    }
263
264    /**
265     * Determine line and column numbers in full text.
266     *
267     * @param pos the character position in the full text
268     * @return the line and column numbers of this character
269     */
270    public LineColumn lineColumn(int pos) {
271        final int[] lineBreakPositions = findLineBreaks();
272        int lineNo = Arrays.binarySearch(lineBreakPositions, pos);
273        if (lineNo < 0) {
274            // we have: lineNo = -(insertion point) - 1
275            // we want: lineNo =  (insertion point) - 1
276            lineNo = -lineNo - 2;
277        }
278        final int startOfLine = lineBreakPositions[lineNo];
279        final int columnNo = pos - startOfLine;
280        // now we have lineNo and columnNo, both starting at zero.
281        return new LineColumn(lineNo + 1, columnNo);
282    }
283
284    /**
285     * Find positions of line breaks in the full text.
286     *
287     * @return an array giving the first positions of each line.
288     */
289    private int[] findLineBreaks() {
290        if (lineBreaks == null) {
291            final int[] lineBreakPositions = new int[size() + 1];
292            lineBreakPositions[0] = 0;
293            int lineNo = 1;
294            final Matcher matcher = LINE_TERMINATOR.matcher(fullText);
295            while (matcher.find()) {
296                lineBreakPositions[lineNo] = matcher.end();
297                lineNo++;
298            }
299            if (lineNo < lineBreakPositions.length) {
300                lineBreakPositions[lineNo] = fullText.length();
301            }
302            lineBreaks = lineBreakPositions;
303        }
304        return lineBreaks;
305    }
306
307    /**
308     * Counts the lines of the text.
309     *
310     * @return the number of lines in the text
311     */
312    public int size() {
313        return lines.length;
314    }
315
316}