001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2024 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.api; 021 022import java.io.BufferedReader; 023import java.io.File; 024import java.io.FileNotFoundException; 025import java.io.IOException; 026import java.io.InputStream; 027import java.io.InputStreamReader; 028import java.io.Reader; 029import java.io.StringReader; 030import java.nio.charset.Charset; 031import java.nio.charset.CharsetDecoder; 032import java.nio.charset.CodingErrorAction; 033import java.nio.charset.UnsupportedCharsetException; 034import java.nio.file.Files; 035import java.util.ArrayList; 036import java.util.Arrays; 037import java.util.List; 038import java.util.regex.Matcher; 039import java.util.regex.Pattern; 040 041import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 042 043/** 044 * Represents the text contents of a file of arbitrary plain text type. 045 * 046 * <p> 047 * This class will be passed to instances of class FileSetCheck by 048 * Checker. 049 * </p> 050 * 051 */ 052public final class FileText { 053 054 /** 055 * The number of characters to read in one go. 056 */ 057 private static final int READ_BUFFER_SIZE = 1024; 058 059 /** 060 * Regular expression pattern matching all line terminators. 061 */ 062 private static final Pattern LINE_TERMINATOR = Pattern.compile("\\n|\\r\\n?"); 063 064 // For now, we always keep both full text and lines array. 065 // In the long run, however, the one passed at initialization might be 066 // enough, while the other could be lazily created when requested. 067 // This would save memory but cost CPU cycles. 068 069 /** 070 * The name of the file. 071 * {@code null} if no file name is available for whatever reason. 072 */ 073 private final File file; 074 075 /** 076 * The charset used to read the file. 077 * {@code null} if the file was reconstructed from a list of lines. 078 */ 079 private final Charset charset; 080 081 /** 082 * The lines of the file, without terminators. 083 */ 084 private final String[] lines; 085 086 /** 087 * The full text contents of the file. 088 * 089 * @noinspection FieldMayBeFinal 090 * @noinspectionreason FieldMayBeFinal - field is not final to ease reaching full test coverage. 091 */ 092 private String fullText; 093 094 /** 095 * The first position of each line within the full text. 096 */ 097 private int[] lineBreaks; 098 099 /** 100 * Copy constructor. 101 * 102 * @param fileText to make copy of 103 */ 104 public FileText(FileText fileText) { 105 file = fileText.file; 106 charset = fileText.charset; 107 fullText = fileText.fullText; 108 lines = fileText.lines.clone(); 109 if (fileText.lineBreaks != null) { 110 lineBreaks = fileText.lineBreaks.clone(); 111 } 112 } 113 114 /** 115 * Compatibility constructor. 116 * 117 * <p>This constructor reconstructs the text of the file by joining 118 * lines with linefeed characters. This process does not restore 119 * the original line terminators and should therefore be avoided. 120 * 121 * @param file the name of the file 122 * @param lines the lines of the text, without terminators 123 * @throws NullPointerException if the lines array is null 124 */ 125 public FileText(File file, List<String> lines) { 126 final StringBuilder buf = new StringBuilder(1024); 127 for (final String line : lines) { 128 buf.append(line).append('\n'); 129 } 130 131 this.file = file; 132 charset = null; 133 fullText = buf.toString(); 134 this.lines = lines.toArray(CommonUtil.EMPTY_STRING_ARRAY); 135 } 136 137 /** 138 * Creates a new file text representation. 139 * 140 * <p>The file will be read using the specified encoding, replacing 141 * malformed input and unmappable characters with the default 142 * replacement character. 143 * 144 * @param file the name of the file 145 * @param charsetName the encoding to use when reading the file 146 * @throws NullPointerException if the text is null 147 * @throws IllegalStateException if the charset is not supported. 148 * @throws IOException if the file could not be read 149 */ 150 public FileText(File file, String charsetName) throws IOException { 151 this.file = file; 152 153 // We use our own decoder, to be sure we have complete control 154 // about replacements. 155 final CharsetDecoder decoder; 156 try { 157 charset = Charset.forName(charsetName); 158 decoder = charset.newDecoder(); 159 decoder.onMalformedInput(CodingErrorAction.REPLACE); 160 decoder.onUnmappableCharacter(CodingErrorAction.REPLACE); 161 } 162 catch (final UnsupportedCharsetException ex) { 163 final String message = "Unsupported charset: " + charsetName; 164 throw new IllegalStateException(message, ex); 165 } 166 167 fullText = readFile(file, decoder); 168 169 // Use the BufferedReader to break down the lines as this 170 // is about 30% faster than using the 171 // LINE_TERMINATOR.split(fullText, -1) method 172 try (BufferedReader reader = new BufferedReader(new StringReader(fullText))) { 173 final ArrayList<String> textLines = new ArrayList<>(); 174 while (true) { 175 final String line = reader.readLine(); 176 if (line == null) { 177 break; 178 } 179 textLines.add(line); 180 } 181 lines = textLines.toArray(CommonUtil.EMPTY_STRING_ARRAY); 182 } 183 } 184 185 /** 186 * Reads file using specific decoder and returns all its content as a String. 187 * 188 * @param inputFile File to read 189 * @param decoder Charset decoder 190 * @return File's text 191 * @throws IOException Unable to open or read the file 192 * @throws FileNotFoundException when inputFile does not exist 193 */ 194 private static String readFile(final File inputFile, final CharsetDecoder decoder) 195 throws IOException { 196 if (!inputFile.exists()) { 197 throw new FileNotFoundException(inputFile.getPath() + " (No such file or directory)"); 198 } 199 final StringBuilder buf = new StringBuilder(1024); 200 final InputStream stream = Files.newInputStream(inputFile.toPath()); 201 try (Reader reader = new InputStreamReader(stream, decoder)) { 202 final char[] chars = new char[READ_BUFFER_SIZE]; 203 while (true) { 204 final int len = reader.read(chars); 205 if (len == -1) { 206 break; 207 } 208 buf.append(chars, 0, len); 209 } 210 } 211 return buf.toString(); 212 } 213 214 /** 215 * Retrieves a line of the text by its number. 216 * The returned line will not contain a trailing terminator. 217 * 218 * @param lineNo the number of the line to get, starting at zero 219 * @return the line with the given number 220 */ 221 public String get(final int lineNo) { 222 return lines[lineNo]; 223 } 224 225 /** 226 * Get the name of the file. 227 * 228 * @return an object containing the name of the file 229 */ 230 public File getFile() { 231 return file; 232 } 233 234 /** 235 * Get the character set which was used to read the file. 236 * Will be {@code null} for a file reconstructed from its lines. 237 * 238 * @return the charset used when the file was read 239 */ 240 public Charset getCharset() { 241 return charset; 242 } 243 244 /** 245 * Retrieve the full text of the file. 246 * 247 * @return the full text of the file 248 */ 249 public CharSequence getFullText() { 250 return fullText; 251 } 252 253 /** 254 * Returns an array of all lines. 255 * {@code text.toLinesArray()} is equivalent to 256 * {@code text.toArray(new String[text.size()])}. 257 * 258 * @return an array of all lines of the text 259 */ 260 public String[] toLinesArray() { 261 return lines.clone(); 262 } 263 264 /** 265 * Determine line and column numbers in full text. 266 * 267 * @param pos the character position in the full text 268 * @return the line and column numbers of this character 269 */ 270 public LineColumn lineColumn(int pos) { 271 final int[] lineBreakPositions = findLineBreaks(); 272 int lineNo = Arrays.binarySearch(lineBreakPositions, pos); 273 if (lineNo < 0) { 274 // we have: lineNo = -(insertion point) - 1 275 // we want: lineNo = (insertion point) - 1 276 lineNo = -lineNo - 2; 277 } 278 final int startOfLine = lineBreakPositions[lineNo]; 279 final int columnNo = pos - startOfLine; 280 // now we have lineNo and columnNo, both starting at zero. 281 return new LineColumn(lineNo + 1, columnNo); 282 } 283 284 /** 285 * Find positions of line breaks in the full text. 286 * 287 * @return an array giving the first positions of each line. 288 */ 289 private int[] findLineBreaks() { 290 if (lineBreaks == null) { 291 final int[] lineBreakPositions = new int[size() + 1]; 292 lineBreakPositions[0] = 0; 293 int lineNo = 1; 294 final Matcher matcher = LINE_TERMINATOR.matcher(fullText); 295 while (matcher.find()) { 296 lineBreakPositions[lineNo] = matcher.end(); 297 lineNo++; 298 } 299 if (lineNo < lineBreakPositions.length) { 300 lineBreakPositions[lineNo] = fullText.length(); 301 } 302 lineBreaks = lineBreakPositions; 303 } 304 return lineBreaks; 305 } 306 307 /** 308 * Counts the lines of the text. 309 * 310 * @return the number of lines in the text 311 */ 312 public int size() { 313 return lines.length; 314 } 315 316}