001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2024 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.regexp; 021 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 026import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 027import com.puppycrawl.tools.checkstyle.api.DetailAST; 028import com.puppycrawl.tools.checkstyle.api.FileContents; 029import com.puppycrawl.tools.checkstyle.api.FileText; 030import com.puppycrawl.tools.checkstyle.api.LineColumn; 031import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 032 033/** 034 * <div> 035 * Checks that a specified pattern exists, exists less than 036 * a set number of times, or does not exist in the file. 037 * </div> 038 * 039 * <p> 040 * This check combines all the functionality provided by 041 * <a href="https://checkstyle.org/checks/header/regexpheader.html#RegexpHeader">RegexpHeader</a> 042 * except supplying the regular expression from a file. 043 * </p> 044 * 045 * <p> 046 * It differs from them in that it works in multiline mode. Its regular expression 047 * can span multiple lines and it checks this against the whole file at once. 048 * The others work in single-line mode. Their single or multiple regular expressions 049 * can only span one line. They check each of these against each line in the file in turn. 050 * </p> 051 * 052 * <p> 053 * <b>Note:</b> Because of the different mode of operation there may be some 054 * changes in the regular expressions used to achieve a particular end. 055 * </p> 056 * 057 * <p> 058 * In multiline mode... 059 * </p> 060 * <ul> 061 * <li> 062 * {@code ^} means the beginning of a line, as opposed to beginning of the input. 063 * </li> 064 * <li> 065 * For beginning of the input use {@code \A}. 066 * </li> 067 * <li> 068 * {@code $} means the end of a line, as opposed to the end of the input. 069 * </li> 070 * <li> 071 * For end of input use {@code \Z}. 072 * </li> 073 * <li> 074 * Each line in the file is terminated with a line feed character. 075 * </li> 076 * </ul> 077 * 078 * <p> 079 * <b>Note:</b> Not all regular expression engines are created equal. 080 * Some provide extra functions that others do not and some elements 081 * of the syntax may vary. This check makes use of the 082 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html"> 083 * java.util.regex package</a>; please check its documentation for details 084 * of how to construct a regular expression to achieve a particular goal. 085 * </p> 086 * 087 * <p> 088 * <b>Note:</b> When entering a regular expression as a parameter in 089 * the XML config file you must also take into account the XML rules. e.g. 090 * if you want to match a < symbol you need to enter &lt;. 091 * The regular expression should be entered on one line. 092 * </p> 093 * <ul> 094 * <li> 095 * Property {@code duplicateLimit} - Control whether to check for duplicates 096 * of a required pattern, any negative value means no checking for duplicates, 097 * any positive value is used as the maximum number of allowed duplicates, 098 * if the limit is exceeded violations will be logged. 099 * Type is {@code int}. 100 * Default value is {@code 0}. 101 * </li> 102 * <li> 103 * Property {@code errorLimit} - Specify the maximum number of violations before 104 * the check will abort. 105 * Type is {@code int}. 106 * Default value is {@code 100}. 107 * </li> 108 * <li> 109 * Property {@code format} - Specify the pattern to match against. 110 * Type is {@code java.util.regex.Pattern}. 111 * Default value is {@code "^$"}. 112 * </li> 113 * <li> 114 * Property {@code ignoreComments} - Control whether to ignore matches found within comments. 115 * Type is {@code boolean}. 116 * Default value is {@code false}. 117 * </li> 118 * <li> 119 * Property {@code illegalPattern} - Control whether the pattern is required or illegal. 120 * Type is {@code boolean}. 121 * Default value is {@code false}. 122 * </li> 123 * <li> 124 * Property {@code message} - Specify message which is used to notify about 125 * violations, if empty then the default (hard-coded) message is used. 126 * Type is {@code java.lang.String}. 127 * Default value is {@code null}. 128 * </li> 129 * </ul> 130 * 131 * <p> 132 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker} 133 * </p> 134 * 135 * <p> 136 * Violation Message Keys: 137 * </p> 138 * <ul> 139 * <li> 140 * {@code duplicate.regexp} 141 * </li> 142 * <li> 143 * {@code illegal.regexp} 144 * </li> 145 * <li> 146 * {@code required.regexp} 147 * </li> 148 * </ul> 149 * 150 * @since 4.0 151 */ 152@FileStatefulCheck 153public class RegexpCheck extends AbstractCheck { 154 155 /** 156 * A key is pointing to the warning message text in "messages.properties" 157 * file. 158 */ 159 public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp"; 160 161 /** 162 * A key is pointing to the warning message text in "messages.properties" 163 * file. 164 */ 165 public static final String MSG_REQUIRED_REGEXP = "required.regexp"; 166 167 /** 168 * A key is pointing to the warning message text in "messages.properties" 169 * file. 170 */ 171 public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp"; 172 173 /** Default duplicate limit. */ 174 private static final int DEFAULT_DUPLICATE_LIMIT = -1; 175 176 /** Default error report limit. */ 177 private static final int DEFAULT_ERROR_LIMIT = 100; 178 179 /** Error count exceeded message. */ 180 private static final String ERROR_LIMIT_EXCEEDED_MESSAGE = 181 "The error limit has been exceeded, " 182 + "the check is aborting, there may be more unreported errors."; 183 184 /** 185 * Specify message which is used to notify about violations, 186 * if empty then the default (hard-coded) message is used. 187 */ 188 private String message; 189 190 /** Control whether to ignore matches found within comments. */ 191 private boolean ignoreComments; 192 193 /** Control whether the pattern is required or illegal. */ 194 private boolean illegalPattern; 195 196 /** Specify the maximum number of violations before the check will abort. */ 197 private int errorLimit = DEFAULT_ERROR_LIMIT; 198 199 /** 200 * Control whether to check for duplicates of a required pattern, 201 * any negative value means no checking for duplicates, 202 * any positive value is used as the maximum number of allowed duplicates, 203 * if the limit is exceeded violations will be logged. 204 */ 205 private int duplicateLimit; 206 207 /** Boolean to say if we should check for duplicates. */ 208 private boolean checkForDuplicates; 209 210 /** Specify the pattern to match against. */ 211 private Pattern format = Pattern.compile("^$", Pattern.MULTILINE); 212 213 /** 214 * Setter to specify message which is used to notify about violations, 215 * if empty then the default (hard-coded) message is used. 216 * 217 * @param message custom message which should be used in report. 218 * @since 4.0 219 */ 220 public void setMessage(String message) { 221 this.message = message; 222 } 223 224 /** 225 * Setter to control whether to ignore matches found within comments. 226 * 227 * @param ignoreComments True if comments should be ignored. 228 * @since 4.0 229 */ 230 public void setIgnoreComments(boolean ignoreComments) { 231 this.ignoreComments = ignoreComments; 232 } 233 234 /** 235 * Setter to control whether the pattern is required or illegal. 236 * 237 * @param illegalPattern True if pattern is not allowed. 238 * @since 4.0 239 */ 240 public void setIllegalPattern(boolean illegalPattern) { 241 this.illegalPattern = illegalPattern; 242 } 243 244 /** 245 * Setter to specify the maximum number of violations before the check will abort. 246 * 247 * @param errorLimit the number of errors to report. 248 * @since 4.0 249 */ 250 public void setErrorLimit(int errorLimit) { 251 this.errorLimit = errorLimit; 252 } 253 254 /** 255 * Setter to control whether to check for duplicates of a required pattern, 256 * any negative value means no checking for duplicates, 257 * any positive value is used as the maximum number of allowed duplicates, 258 * if the limit is exceeded violations will be logged. 259 * 260 * @param duplicateLimit negative values mean no duplicate checking, 261 * any positive value is used as the limit. 262 * @since 4.0 263 */ 264 public void setDuplicateLimit(int duplicateLimit) { 265 this.duplicateLimit = duplicateLimit; 266 checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT; 267 } 268 269 /** 270 * Setter to specify the pattern to match against. 271 * 272 * @param pattern the new pattern 273 * @since 4.0 274 */ 275 public final void setFormat(Pattern pattern) { 276 format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE); 277 } 278 279 @Override 280 public int[] getDefaultTokens() { 281 return getRequiredTokens(); 282 } 283 284 @Override 285 public int[] getAcceptableTokens() { 286 return getRequiredTokens(); 287 } 288 289 @Override 290 public int[] getRequiredTokens() { 291 return CommonUtil.EMPTY_INT_ARRAY; 292 } 293 294 @Override 295 public void beginTree(DetailAST rootAST) { 296 processRegexpMatches(); 297 } 298 299 /** 300 * Processes the regexp matches and logs the number of errors in the file. 301 * 302 */ 303 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 304 @SuppressWarnings("deprecation") 305 private void processRegexpMatches() { 306 final Matcher matcher = format.matcher(getFileContents().getText().getFullText()); 307 int errorCount = 0; 308 int matchCount = 0; 309 final FileText text = getFileContents().getText(); 310 while (errorCount < errorLimit && matcher.find()) { 311 final LineColumn start = text.lineColumn(matcher.start()); 312 final int startLine = start.getLine(); 313 314 final boolean ignore = isIgnore(startLine, text, start, matcher); 315 if (!ignore) { 316 matchCount++; 317 if (illegalPattern || checkForDuplicates 318 && matchCount - 1 > duplicateLimit) { 319 errorCount++; 320 logMessage(startLine, errorCount); 321 } 322 } 323 } 324 if (!illegalPattern && matchCount == 0) { 325 final String msg = getMessage(errorCount); 326 log(1, MSG_REQUIRED_REGEXP, msg); 327 } 328 } 329 330 /** 331 * Detect ignore situation. 332 * 333 * @param startLine position of line 334 * @param text file text 335 * @param start line column 336 * @param matcher The matcher 337 * @return true is that need to be ignored 338 */ 339 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 340 @SuppressWarnings("deprecation") 341 private boolean isIgnore(int startLine, FileText text, LineColumn start, Matcher matcher) { 342 final LineColumn end; 343 if (matcher.end() == 0) { 344 end = text.lineColumn(0); 345 } 346 else { 347 end = text.lineColumn(matcher.end() - 1); 348 } 349 boolean ignore = false; 350 if (ignoreComments) { 351 final FileContents theFileContents = getFileContents(); 352 final int startColumn = start.getColumn(); 353 final int endLine = end.getLine(); 354 final int endColumn = end.getColumn(); 355 ignore = theFileContents.hasIntersectionWithComment(startLine, 356 startColumn, endLine, endColumn); 357 } 358 return ignore; 359 } 360 361 /** 362 * Displays the right message. 363 * 364 * @param lineNumber the line number the message relates to. 365 * @param errorCount number of errors in the file. 366 */ 367 private void logMessage(int lineNumber, int errorCount) { 368 final String msg = getMessage(errorCount); 369 370 if (illegalPattern) { 371 log(lineNumber, MSG_ILLEGAL_REGEXP, msg); 372 } 373 else { 374 log(lineNumber, MSG_DUPLICATE_REGEXP, msg); 375 } 376 } 377 378 /** 379 * Provide right message. 380 * 381 * @param errorCount number of errors in the file. 382 * @return message for violation. 383 */ 384 private String getMessage(int errorCount) { 385 String msg; 386 387 if (message == null || message.isEmpty()) { 388 msg = format.pattern(); 389 } 390 else { 391 msg = message; 392 } 393 394 if (errorCount >= errorLimit) { 395 msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg; 396 } 397 398 return msg; 399 } 400}