001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2026 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.regexp; 021 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 026import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 027import com.puppycrawl.tools.checkstyle.api.DetailAST; 028import com.puppycrawl.tools.checkstyle.api.FileContents; 029import com.puppycrawl.tools.checkstyle.api.FileText; 030import com.puppycrawl.tools.checkstyle.api.LineColumn; 031import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 032 033/** 034 * <div> 035 * Checks that a specified pattern exists, exists less than 036 * a set number of times, or does not exist in the file. 037 * </div> 038 * 039 * <p> 040 * This check combines all the functionality provided by 041 * <a href="https://checkstyle.org/checks/header/regexpheader.html">RegexpHeader</a> 042 * except supplying the regular expression from a file. 043 * </p> 044 * 045 * <p> 046 * It differs from them in that it works in multiline mode. Its regular expression 047 * can span multiple lines and it checks this against the whole file at once. 048 * The others work in single-line mode. Their single or multiple regular expressions 049 * can only span one line. They check each of these against each line in the file in turn. 050 * </p> 051 * 052 * <p> 053 * <b>Note:</b> Because of the different mode of operation there may be some 054 * changes in the regular expressions used to achieve a particular end. 055 * </p> 056 * 057 * <p> 058 * In multiline mode... 059 * </p> 060 * <ul> 061 * <li> 062 * {@code ^} means the beginning of a line, as opposed to beginning of the input. 063 * </li> 064 * <li> 065 * For beginning of the input use {@code \A}. 066 * </li> 067 * <li> 068 * {@code $} means the end of a line, as opposed to the end of the input. 069 * </li> 070 * <li> 071 * For end of input use {@code \Z}. 072 * </li> 073 * <li> 074 * Each line in the file is terminated with a line feed character. 075 * </li> 076 * </ul> 077 * 078 * <p> 079 * <b>Note:</b> Not all regular expression engines are created equal. 080 * Some provide extra functions that others do not and some elements 081 * of the syntax may vary. This check makes use of the 082 * <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/regex/package-summary.html"> 083 * java.util.regex package</a>; please check its documentation for details 084 * of how to construct a regular expression to achieve a particular goal. 085 * </p> 086 * 087 * <p> 088 * <b>Note:</b> When entering a regular expression as a parameter in 089 * the XML config file you must also take into account the XML rules. e.g. 090 * if you want to match a {@literal <} symbol you need to enter &lt;. 091 * The regular expression should be entered on one line. 092 * </p> 093 * 094 * <p> 095 * <b>Note:</b> To search for parentheses () in a regular expression 096 * you must escape them like \(\). This is required by the regexp engine, 097 * otherwise it will think they are special instruction characters. 098 * </p> 099 * 100 * <p> 101 * <b>Note:</b> To search for things that mean something in XML, like 102 * {@literal <} you need to escape them like &lt;. This is required so the 103 * XML parser does not act on them, but instead passes the correct 104 * character to the regexp engine. 105 * </p> 106 * 107 * @since 4.0 108 */ 109@FileStatefulCheck 110public class RegexpCheck extends AbstractCheck { 111 112 /** 113 * A key is pointing to the warning message text in "messages.properties" 114 * file. 115 */ 116 public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp"; 117 118 /** 119 * A key is pointing to the warning message text in "messages.properties" 120 * file. 121 */ 122 public static final String MSG_REQUIRED_REGEXP = "required.regexp"; 123 124 /** 125 * A key is pointing to the warning message text in "messages.properties" 126 * file. 127 */ 128 public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp"; 129 130 /** Default duplicate limit. */ 131 private static final int DEFAULT_DUPLICATE_LIMIT = -1; 132 133 /** Default error report limit. */ 134 private static final int DEFAULT_ERROR_LIMIT = 100; 135 136 /** Error count exceeded message. */ 137 private static final String ERROR_LIMIT_EXCEEDED_MESSAGE = 138 "The error limit has been exceeded, " 139 + "the check is aborting, there may be more unreported errors."; 140 141 /** 142 * Specify message which is used to notify about violations, 143 * if empty then the default (hard-coded) message is used. 144 */ 145 private String message; 146 147 /** Control whether to ignore matches found within comments. */ 148 private boolean ignoreComments; 149 150 /** Control whether the pattern is required or illegal. */ 151 private boolean illegalPattern; 152 153 /** Specify the maximum number of violations before the check will abort. */ 154 private int errorLimit = DEFAULT_ERROR_LIMIT; 155 156 /** 157 * Control whether to check for duplicates of a required pattern, 158 * any negative value means no checking for duplicates, 159 * any positive value is used as the maximum number of allowed duplicates, 160 * if the limit is exceeded violations will be logged. 161 */ 162 private int duplicateLimit; 163 164 /** Boolean to say if we should check for duplicates. */ 165 private boolean checkForDuplicates; 166 167 /** Specify the pattern to match against. */ 168 private Pattern format = Pattern.compile("^$", Pattern.MULTILINE); 169 170 /** 171 * Setter to specify message which is used to notify about violations, 172 * if empty then the default (hard-coded) message is used. 173 * 174 * @param message custom message which should be used in report. 175 * @since 4.0 176 */ 177 public void setMessage(String message) { 178 this.message = message; 179 } 180 181 /** 182 * Setter to control whether to ignore matches found within comments. 183 * 184 * @param ignoreComments True if comments should be ignored. 185 * @since 4.0 186 */ 187 public void setIgnoreComments(boolean ignoreComments) { 188 this.ignoreComments = ignoreComments; 189 } 190 191 /** 192 * Setter to control whether the pattern is required or illegal. 193 * 194 * @param illegalPattern True if pattern is not allowed. 195 * @since 4.0 196 */ 197 public void setIllegalPattern(boolean illegalPattern) { 198 this.illegalPattern = illegalPattern; 199 } 200 201 /** 202 * Setter to specify the maximum number of violations before the check will abort. 203 * 204 * @param errorLimit the number of errors to report. 205 * @since 4.0 206 */ 207 public void setErrorLimit(int errorLimit) { 208 this.errorLimit = errorLimit; 209 } 210 211 /** 212 * Setter to control whether to check for duplicates of a required pattern, 213 * any negative value means no checking for duplicates, 214 * any positive value is used as the maximum number of allowed duplicates, 215 * if the limit is exceeded violations will be logged. 216 * 217 * @param duplicateLimit negative values mean no duplicate checking, 218 * any positive value is used as the limit. 219 * @since 4.0 220 */ 221 public void setDuplicateLimit(int duplicateLimit) { 222 this.duplicateLimit = duplicateLimit; 223 checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT; 224 } 225 226 /** 227 * Setter to specify the pattern to match against. 228 * 229 * @param pattern the new pattern 230 * @since 4.0 231 */ 232 public final void setFormat(Pattern pattern) { 233 format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE); 234 } 235 236 @Override 237 public int[] getDefaultTokens() { 238 return getRequiredTokens(); 239 } 240 241 @Override 242 public int[] getAcceptableTokens() { 243 return getRequiredTokens(); 244 } 245 246 @Override 247 public int[] getRequiredTokens() { 248 return CommonUtil.EMPTY_INT_ARRAY; 249 } 250 251 @Override 252 public void beginTree(DetailAST rootAST) { 253 processRegexpMatches(); 254 } 255 256 /** 257 * Processes the regexp matches and logs the number of errors in the file. 258 * 259 */ 260 @SuppressWarnings("deprecation") 261 private void processRegexpMatches() { 262 final Matcher matcher = format.matcher(getFileContents().getText().getFullText()); 263 int errorCount = 0; 264 int matchCount = 0; 265 final FileText text = getFileContents().getText(); 266 while (errorCount < errorLimit && matcher.find()) { 267 final LineColumn start = text.lineColumn(matcher.start()); 268 final int startLine = start.getLine(); 269 270 final boolean ignore = isIgnore(startLine, text, start, matcher); 271 if (!ignore) { 272 matchCount++; 273 if (illegalPattern || checkForDuplicates 274 && matchCount - 1 > duplicateLimit) { 275 errorCount++; 276 logMessage(startLine, errorCount); 277 } 278 } 279 } 280 if (!illegalPattern && matchCount == 0) { 281 final String msg = getMessage(errorCount); 282 log(1, MSG_REQUIRED_REGEXP, msg); 283 } 284 } 285 286 /** 287 * Detect ignore situation. 288 * 289 * @param startLine position of line 290 * @param text file text 291 * @param start line column 292 * @param matcher The matcher 293 * @return true is that need to be ignored 294 */ 295 @SuppressWarnings("deprecation") 296 private boolean isIgnore(int startLine, FileText text, LineColumn start, Matcher matcher) { 297 final LineColumn end; 298 if (matcher.end() == 0) { 299 end = text.lineColumn(0); 300 } 301 else { 302 end = text.lineColumn(matcher.end() - 1); 303 } 304 boolean ignore = false; 305 if (ignoreComments) { 306 final FileContents theFileContents = getFileContents(); 307 final int startColumn = start.getColumn(); 308 final int endLine = end.getLine(); 309 final int endColumn = end.getColumn(); 310 ignore = theFileContents.hasIntersectionWithComment(startLine, 311 startColumn, endLine, endColumn); 312 } 313 return ignore; 314 } 315 316 /** 317 * Displays the right message. 318 * 319 * @param lineNumber the line number the message relates to. 320 * @param errorCount number of errors in the file. 321 */ 322 private void logMessage(int lineNumber, int errorCount) { 323 final String msg = getMessage(errorCount); 324 325 if (illegalPattern) { 326 log(lineNumber, MSG_ILLEGAL_REGEXP, msg); 327 } 328 else { 329 log(lineNumber, MSG_DUPLICATE_REGEXP, msg); 330 } 331 } 332 333 /** 334 * Provide right message. 335 * 336 * @param errorCount number of errors in the file. 337 * @return message for violation. 338 */ 339 private String getMessage(int errorCount) { 340 String msg; 341 342 if (message == null || message.isEmpty()) { 343 msg = format.pattern(); 344 } 345 else { 346 msg = message; 347 } 348 349 if (errorCount >= errorLimit) { 350 msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg; 351 } 352 353 return msg; 354 } 355}