001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2024 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.header;
021
022import java.io.File;
023import java.util.ArrayList;
024import java.util.BitSet;
025import java.util.List;
026import java.util.regex.Pattern;
027import java.util.regex.PatternSyntaxException;
028
029import com.puppycrawl.tools.checkstyle.StatelessCheck;
030import com.puppycrawl.tools.checkstyle.api.FileText;
031import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
032import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
033
034/**
035 * <div>
036 * Checks the header of a source file against a header that contains a
037 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html">
038 * pattern</a> for each line of the source header.
039 * </div>
040 *
041 * <p>
042 * Rationale: In some projects <a href="https://checkstyle.org/checks/header/header.html#Header">
043 * checking against a fixed header</a> is not sufficient, e.g. the header might
044 * require a copyright line where the year information is not static.
045 * </p>
046 *
047 * <p>
048 * For example, consider the following header:
049 * </p>
050 * <pre>
051 * line  1: ^/{71}$
052 * line  2: ^// checkstyle:$
053 * line  3: ^// Checks Java source code for adherence to a set of rules\.$
054 * line  4: ^// Copyright \(C\) \d\d\d\d  Oliver Burn$
055 * line  5: ^// Last modification by \$Author.*\$$
056 * line  6: ^/{71}$
057 * line  7:
058 * line  8: ^package
059 * line  9:
060 * line 10: ^import
061 * line 11:
062 * line 12: ^/\*\*
063 * line 13: ^ \*([^/]|$)
064 * line 14: ^ \*&#47;
065 * </pre>
066 *
067 * <p>
068 * Lines 1 and 6 demonstrate a more compact notation for 71 '/' characters.
069 * Line 4 enforces that the copyright notice includes a four digit year.
070 * Line 5 is an example how to enforce revision control keywords in a file header.
071 * Lines 12-14 is a template for javadoc (line 13 is so complicated to remove
072 * conflict with and of javadoc comment). Lines 7, 9 and 11 will be treated
073 * as '^$' and will forcefully expect the line to be empty.
074 * </p>
075 *
076 * <p>
077 * Different programming languages have different comment syntax rules,
078 * but all of them start a comment with a non-word character.
079 * Hence, you can often use the non-word character class to abstract away
080 * the concrete comment syntax and allow checking the header for different
081 * languages with a single header definition. For example, consider the following
082 * header specification (note that this is not the full Apache license header):
083 * </p>
084 * <pre>
085 * line 1: ^#!
086 * line 2: ^&lt;\?xml.*&gt;$
087 * line 3: ^\W*$
088 * line 4: ^\W*Copyright 2006 The Apache Software Foundation or its licensors, as applicable\.$
089 * line 5: ^\W*Licensed under the Apache License, Version 2\.0 \(the "License"\);$
090 * line 6: ^\W*$
091 * </pre>
092 *
093 * <p>
094 * Lines 1 and 2 leave room for technical header lines, e.g. the "#!/bin/sh"
095 * line in Unix shell scripts, or the XML file header of XML files.
096 * Set the multiline property to "1, 2" so these lines can be ignored for
097 * file types where they do no apply. Lines 3 through 6 define the actual header content.
098 * Note how lines 2, 4 and 5 use escapes for characters that have special regexp semantics.
099 * </p>
100 *
101 * <p>
102 * In default configuration, if header is not specified, the default value
103 * of header is set to null and the check does not rise any violations.
104 * </p>
105 * <ul>
106 * <li>
107 * Property {@code charset} - Specify the character encoding to use when reading the headerFile.
108 * Type is {@code java.lang.String}.
109 * Default value is {@code the charset property of the parent
110 * <a href="https://checkstyle.org/config.html#Checker">Checker</a> module}.
111 * </li>
112 * <li>
113 * Property {@code fileExtensions} - Specify the file extensions of the files to process.
114 * Type is {@code java.lang.String[]}.
115 * Default value is {@code ""}.
116 * </li>
117 * <li>
118 * Property {@code header} - Define the required header specified inline.
119 * Individual header lines must be separated by the string {@code "\n"}
120 * (even on platforms with a different line separator).
121 * For header lines containing {@code "\n\n"} checkstyle will
122 * forcefully expect an empty line to exist. See examples below.
123 * Regular expressions must not span multiple lines.
124 * Type is {@code java.lang.String}.
125 * Default value is {@code null}.
126 * </li>
127 * <li>
128 * Property {@code headerFile} - Specify the name of the file containing the required header.
129 * Type is {@code java.net.URI}.
130 * Default value is {@code null}.
131 * </li>
132 * <li>
133 * Property {@code multiLines} - Specify the line numbers to repeat (zero or more times).
134 * Type is {@code int[]}.
135 * Default value is {@code ""}.
136 * </li>
137 * </ul>
138 *
139 * <p>
140 * Parent is {@code com.puppycrawl.tools.checkstyle.Checker}
141 * </p>
142 *
143 * <p>
144 * Violation Message Keys:
145 * </p>
146 * <ul>
147 * <li>
148 * {@code header.mismatch}
149 * </li>
150 * <li>
151 * {@code header.missing}
152 * </li>
153 * </ul>
154 *
155 * @since 6.9
156 */
157@StatelessCheck
158public class RegexpHeaderCheck extends AbstractHeaderCheck {
159
160    /**
161     * A key is pointing to the warning message text in "messages.properties"
162     * file.
163     */
164    public static final String MSG_HEADER_MISSING = "header.missing";
165
166    /**
167     * A key is pointing to the warning message text in "messages.properties"
168     * file.
169     */
170    public static final String MSG_HEADER_MISMATCH = "header.mismatch";
171
172    /** Regex pattern for a blank line. **/
173    private static final String EMPTY_LINE_PATTERN = "^$";
174
175    /** Compiled regex pattern for a blank line. **/
176    private static final Pattern BLANK_LINE = Pattern.compile(EMPTY_LINE_PATTERN);
177
178    /** The compiled regular expressions. */
179    private final List<Pattern> headerRegexps = new ArrayList<>();
180
181    /** Specify the line numbers to repeat (zero or more times). */
182    private BitSet multiLines = new BitSet();
183
184    /**
185     * Setter to specify the line numbers to repeat (zero or more times).
186     *
187     * @param list line numbers to repeat in header.
188     * @since 3.4
189     */
190    public void setMultiLines(int... list) {
191        multiLines = TokenUtil.asBitSet(list);
192    }
193
194    @Override
195    protected void processFiltered(File file, FileText fileText) {
196        final int headerSize = getHeaderLines().size();
197        final int fileSize = fileText.size();
198
199        if (headerSize - multiLines.cardinality() > fileSize) {
200            log(1, MSG_HEADER_MISSING);
201        }
202        else {
203            int headerLineNo = 0;
204            int index;
205            for (index = 0; headerLineNo < headerSize && index < fileSize; index++) {
206                final String line = fileText.get(index);
207                boolean isMatch = isMatch(line, headerLineNo);
208                while (!isMatch && isMultiLine(headerLineNo)) {
209                    headerLineNo++;
210                    isMatch = headerLineNo == headerSize
211                            || isMatch(line, headerLineNo);
212                }
213                if (!isMatch) {
214                    log(index + 1, MSG_HEADER_MISMATCH, getHeaderLine(headerLineNo));
215                    break;
216                }
217                if (!isMultiLine(headerLineNo)) {
218                    headerLineNo++;
219                }
220            }
221            if (index == fileSize) {
222                // if file finished, but we have at least one non-multi-line
223                // header isn't completed
224                logFirstSinglelineLine(headerLineNo, headerSize);
225            }
226        }
227    }
228
229    /**
230     * Returns the line from the header. Where the line is blank return the regexp pattern
231     * for a blank line.
232     *
233     * @param headerLineNo header line number to return
234     * @return the line from the header
235     */
236    private String getHeaderLine(int headerLineNo) {
237        String line = getHeaderLines().get(headerLineNo);
238        if (line.isEmpty()) {
239            line = EMPTY_LINE_PATTERN;
240        }
241        return line;
242    }
243
244    /**
245     * Logs warning if any non-multiline lines left in header regexp.
246     *
247     * @param startHeaderLine header line number to start from
248     * @param headerSize whole header size
249     */
250    private void logFirstSinglelineLine(int startHeaderLine, int headerSize) {
251        for (int lineNum = startHeaderLine; lineNum < headerSize; lineNum++) {
252            if (!isMultiLine(lineNum)) {
253                log(1, MSG_HEADER_MISSING);
254                break;
255            }
256        }
257    }
258
259    /**
260     * Checks if a code line matches the required header line.
261     *
262     * @param line the code line
263     * @param headerLineNo the header line number.
264     * @return true if and only if the line matches the required header line.
265     */
266    private boolean isMatch(String line, int headerLineNo) {
267        return headerRegexps.get(headerLineNo).matcher(line).find();
268    }
269
270    /**
271     * Returns true if line is multiline header lines or false.
272     *
273     * @param lineNo a line number
274     * @return if {@code lineNo} is one of the repeat header lines.
275     */
276    private boolean isMultiLine(int lineNo) {
277        return multiLines.get(lineNo + 1);
278    }
279
280    @Override
281    protected void postProcessHeaderLines() {
282        final List<String> headerLines = getHeaderLines();
283        for (String line : headerLines) {
284            try {
285                if (line.isEmpty()) {
286                    headerRegexps.add(BLANK_LINE);
287                }
288                else {
289                    headerRegexps.add(Pattern.compile(line));
290                }
291            }
292            catch (final PatternSyntaxException ex) {
293                throw new IllegalArgumentException("line "
294                        + (headerRegexps.size() + 1)
295                        + " in header specification"
296                        + " is not a regular expression", ex);
297            }
298        }
299    }
300
301    /**
302     * Setter to define the required header specified inline.
303     * Individual header lines must be separated by the string {@code "\n"}
304     * (even on platforms with a different line separator).
305     * For header lines containing {@code "\n\n"} checkstyle will forcefully
306     * expect an empty line to exist. See examples below.
307     * Regular expressions must not span multiple lines.
308     *
309     * @param header the header value to validate and set (in that order)
310     * @since 5.0
311     */
312    @Override
313    public void setHeader(String header) {
314        if (!CommonUtil.isBlank(header)) {
315            if (!CommonUtil.isPatternValid(header)) {
316                throw new IllegalArgumentException("Unable to parse format: " + header);
317            }
318            super.setHeader(header);
319        }
320    }
321
322}