001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2024 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.regexp;
021
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024
025import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
026import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
027import com.puppycrawl.tools.checkstyle.api.DetailAST;
028import com.puppycrawl.tools.checkstyle.api.FileContents;
029import com.puppycrawl.tools.checkstyle.api.FileText;
030import com.puppycrawl.tools.checkstyle.api.LineColumn;
031import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
032
033/**
034 * <div>
035 * Checks that a specified pattern exists, exists less than
036 * a set number of times, or does not exist in the file.
037 * </div>
038 *
039 * <p>
040 * This check combines all the functionality provided by
041 * <a href="https://checkstyle.org/checks/header/regexpheader.html#RegexpHeader">RegexpHeader</a>
042 * except supplying the regular expression from a file.
043 * </p>
044 *
045 * <p>
046 * It differs from them in that it works in multiline mode. Its regular expression
047 * can span multiple lines and it checks this against the whole file at once.
048 * The others work in single-line mode. Their single or multiple regular expressions
049 * can only span one line. They check each of these against each line in the file in turn.
050 * </p>
051 *
052 * <p>
053 * <b>Note:</b> Because of the different mode of operation there may be some
054 * changes in the regular expressions used to achieve a particular end.
055 * </p>
056 *
057 * <p>
058 * In multiline mode...
059 * </p>
060 * <ul>
061 * <li>
062 * {@code ^} means the beginning of a line, as opposed to beginning of the input.
063 * </li>
064 * <li>
065 * For beginning of the input use {@code \A}.
066 * </li>
067 * <li>
068 * {@code $} means the end of a line, as opposed to the end of the input.
069 * </li>
070 * <li>
071 * For end of input use {@code \Z}.
072 * </li>
073 * <li>
074 * Each line in the file is terminated with a line feed character.
075 * </li>
076 * </ul>
077 *
078 * <p>
079 * <b>Note:</b> Not all regular expression engines are created equal.
080 * Some provide extra functions that others do not and some elements
081 * of the syntax may vary. This check makes use of the
082 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/package-summary.html">
083 * java.util.regex package</a>; please check its documentation for details
084 * of how to construct a regular expression to achieve a particular goal.
085 * </p>
086 *
087 * <p>
088 * <b>Note:</b> When entering a regular expression as a parameter in
089 * the XML config file you must also take into account the XML rules. e.g.
090 * if you want to match a &lt; symbol you need to enter &amp;lt;.
091 * The regular expression should be entered on one line.
092 * </p>
093 * <ul>
094 * <li>
095 * Property {@code duplicateLimit} - Control whether to check for duplicates
096 * of a required pattern, any negative value means no checking for duplicates,
097 * any positive value is used as the maximum number of allowed duplicates,
098 * if the limit is exceeded violations will be logged.
099 * Type is {@code int}.
100 * Default value is {@code 0}.
101 * </li>
102 * <li>
103 * Property {@code errorLimit} - Specify the maximum number of violations before
104 * the check will abort.
105 * Type is {@code int}.
106 * Default value is {@code 100}.
107 * </li>
108 * <li>
109 * Property {@code format} - Specify the pattern to match against.
110 * Type is {@code java.util.regex.Pattern}.
111 * Default value is {@code "^$"}.
112 * </li>
113 * <li>
114 * Property {@code ignoreComments} - Control whether to ignore matches found within comments.
115 * Type is {@code boolean}.
116 * Default value is {@code false}.
117 * </li>
118 * <li>
119 * Property {@code illegalPattern} - Control whether the pattern is required or illegal.
120 * Type is {@code boolean}.
121 * Default value is {@code false}.
122 * </li>
123 * <li>
124 * Property {@code message} - Specify message which is used to notify about
125 * violations, if empty then the default (hard-coded) message is used.
126 * Type is {@code java.lang.String}.
127 * Default value is {@code null}.
128 * </li>
129 * </ul>
130 *
131 * <p>
132 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
133 * </p>
134 *
135 * <p>
136 * Violation Message Keys:
137 * </p>
138 * <ul>
139 * <li>
140 * {@code duplicate.regexp}
141 * </li>
142 * <li>
143 * {@code illegal.regexp}
144 * </li>
145 * <li>
146 * {@code required.regexp}
147 * </li>
148 * </ul>
149 *
150 * @since 4.0
151 */
152@FileStatefulCheck
153public class RegexpCheck extends AbstractCheck {
154
155    /**
156     * A key is pointing to the warning message text in "messages.properties"
157     * file.
158     */
159    public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp";
160
161    /**
162     * A key is pointing to the warning message text in "messages.properties"
163     * file.
164     */
165    public static final String MSG_REQUIRED_REGEXP = "required.regexp";
166
167    /**
168     * A key is pointing to the warning message text in "messages.properties"
169     * file.
170     */
171    public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp";
172
173    /** Default duplicate limit. */
174    private static final int DEFAULT_DUPLICATE_LIMIT = -1;
175
176    /** Default error report limit. */
177    private static final int DEFAULT_ERROR_LIMIT = 100;
178
179    /** Error count exceeded message. */
180    private static final String ERROR_LIMIT_EXCEEDED_MESSAGE =
181        "The error limit has been exceeded, "
182        + "the check is aborting, there may be more unreported errors.";
183
184    /**
185     * Specify message which is used to notify about violations,
186     * if empty then the default (hard-coded) message is used.
187     */
188    private String message;
189
190    /** Control whether to ignore matches found within comments. */
191    private boolean ignoreComments;
192
193    /** Control whether the pattern is required or illegal. */
194    private boolean illegalPattern;
195
196    /** Specify the maximum number of violations before the check will abort. */
197    private int errorLimit = DEFAULT_ERROR_LIMIT;
198
199    /**
200     * Control whether to check for duplicates of a required pattern,
201     * any negative value means no checking for duplicates,
202     * any positive value is used as the maximum number of allowed duplicates,
203     * if the limit is exceeded violations will be logged.
204     */
205    private int duplicateLimit;
206
207    /** Boolean to say if we should check for duplicates. */
208    private boolean checkForDuplicates;
209
210    /** Specify the pattern to match against. */
211    private Pattern format = Pattern.compile("^$", Pattern.MULTILINE);
212
213    /**
214     * Setter to specify message which is used to notify about violations,
215     * if empty then the default (hard-coded) message is used.
216     *
217     * @param message custom message which should be used in report.
218     * @since 4.0
219     */
220    public void setMessage(String message) {
221        this.message = message;
222    }
223
224    /**
225     * Setter to control whether to ignore matches found within comments.
226     *
227     * @param ignoreComments True if comments should be ignored.
228     * @since 4.0
229     */
230    public void setIgnoreComments(boolean ignoreComments) {
231        this.ignoreComments = ignoreComments;
232    }
233
234    /**
235     * Setter to control whether the pattern is required or illegal.
236     *
237     * @param illegalPattern True if pattern is not allowed.
238     * @since 4.0
239     */
240    public void setIllegalPattern(boolean illegalPattern) {
241        this.illegalPattern = illegalPattern;
242    }
243
244    /**
245     * Setter to specify the maximum number of violations before the check will abort.
246     *
247     * @param errorLimit the number of errors to report.
248     * @since 4.0
249     */
250    public void setErrorLimit(int errorLimit) {
251        this.errorLimit = errorLimit;
252    }
253
254    /**
255     * Setter to control whether to check for duplicates of a required pattern,
256     * any negative value means no checking for duplicates,
257     * any positive value is used as the maximum number of allowed duplicates,
258     * if the limit is exceeded violations will be logged.
259     *
260     * @param duplicateLimit negative values mean no duplicate checking,
261     *     any positive value is used as the limit.
262     * @since 4.0
263     */
264    public void setDuplicateLimit(int duplicateLimit) {
265        this.duplicateLimit = duplicateLimit;
266        checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT;
267    }
268
269    /**
270     * Setter to specify the pattern to match against.
271     *
272     * @param pattern the new pattern
273     * @since 4.0
274     */
275    public final void setFormat(Pattern pattern) {
276        format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE);
277    }
278
279    @Override
280    public int[] getDefaultTokens() {
281        return getRequiredTokens();
282    }
283
284    @Override
285    public int[] getAcceptableTokens() {
286        return getRequiredTokens();
287    }
288
289    @Override
290    public int[] getRequiredTokens() {
291        return CommonUtil.EMPTY_INT_ARRAY;
292    }
293
294    @Override
295    public void beginTree(DetailAST rootAST) {
296        processRegexpMatches();
297    }
298
299    /**
300     * Processes the regexp matches and logs the number of errors in the file.
301     *
302     */
303    // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
304    @SuppressWarnings("deprecation")
305    private void processRegexpMatches() {
306        final Matcher matcher = format.matcher(getFileContents().getText().getFullText());
307        int errorCount = 0;
308        int matchCount = 0;
309        final FileText text = getFileContents().getText();
310        while (errorCount < errorLimit && matcher.find()) {
311            final LineColumn start = text.lineColumn(matcher.start());
312            final int startLine = start.getLine();
313
314            final boolean ignore = isIgnore(startLine, text, start, matcher);
315            if (!ignore) {
316                matchCount++;
317                if (illegalPattern || checkForDuplicates
318                        && matchCount - 1 > duplicateLimit) {
319                    errorCount++;
320                    logMessage(startLine, errorCount);
321                }
322            }
323        }
324        if (!illegalPattern && matchCount == 0) {
325            final String msg = getMessage(errorCount);
326            log(1, MSG_REQUIRED_REGEXP, msg);
327        }
328    }
329
330    /**
331     * Detect ignore situation.
332     *
333     * @param startLine position of line
334     * @param text file text
335     * @param start line column
336     * @param matcher The matcher
337     * @return true is that need to be ignored
338     */
339    // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
340    @SuppressWarnings("deprecation")
341    private boolean isIgnore(int startLine, FileText text, LineColumn start, Matcher matcher) {
342        final LineColumn end;
343        if (matcher.end() == 0) {
344            end = text.lineColumn(0);
345        }
346        else {
347            end = text.lineColumn(matcher.end() - 1);
348        }
349        boolean ignore = false;
350        if (ignoreComments) {
351            final FileContents theFileContents = getFileContents();
352            final int startColumn = start.getColumn();
353            final int endLine = end.getLine();
354            final int endColumn = end.getColumn();
355            ignore = theFileContents.hasIntersectionWithComment(startLine,
356                startColumn, endLine, endColumn);
357        }
358        return ignore;
359    }
360
361    /**
362     * Displays the right message.
363     *
364     * @param lineNumber the line number the message relates to.
365     * @param errorCount number of errors in the file.
366     */
367    private void logMessage(int lineNumber, int errorCount) {
368        final String msg = getMessage(errorCount);
369
370        if (illegalPattern) {
371            log(lineNumber, MSG_ILLEGAL_REGEXP, msg);
372        }
373        else {
374            log(lineNumber, MSG_DUPLICATE_REGEXP, msg);
375        }
376    }
377
378    /**
379     * Provide right message.
380     *
381     * @param errorCount number of errors in the file.
382     * @return message for violation.
383     */
384    private String getMessage(int errorCount) {
385        String msg;
386
387        if (message == null || message.isEmpty()) {
388            msg = format.pattern();
389        }
390        else {
391            msg = message;
392        }
393
394        if (errorCount >= errorLimit) {
395            msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg;
396        }
397
398        return msg;
399    }
400}