001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2024 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.header; 021 022import java.io.File; 023import java.util.ArrayList; 024import java.util.BitSet; 025import java.util.List; 026import java.util.regex.Pattern; 027import java.util.regex.PatternSyntaxException; 028 029import com.puppycrawl.tools.checkstyle.StatelessCheck; 030import com.puppycrawl.tools.checkstyle.api.FileText; 031import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 032import com.puppycrawl.tools.checkstyle.utils.TokenUtil; 033 034/** 035 * <div> 036 * Checks the header of a source file against a header that contains a 037 * <a href="https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/regex/Pattern.html"> 038 * pattern</a> for each line of the source header. 039 * </div> 040 * 041 * <p> 042 * Rationale: In some projects <a href="https://checkstyle.org/checks/header/header.html#Header"> 043 * checking against a fixed header</a> is not sufficient, e.g. the header might 044 * require a copyright line where the year information is not static. 045 * </p> 046 * 047 * <p> 048 * For example, consider the following header: 049 * </p> 050 * <pre> 051 * line 1: ^/{71}$ 052 * line 2: ^// checkstyle:$ 053 * line 3: ^// Checks Java source code for adherence to a set of rules\.$ 054 * line 4: ^// Copyright \(C\) \d\d\d\d Oliver Burn$ 055 * line 5: ^// Last modification by \$Author.*\$$ 056 * line 6: ^/{71}$ 057 * line 7: 058 * line 8: ^package 059 * line 9: 060 * line 10: ^import 061 * line 11: 062 * line 12: ^/\*\* 063 * line 13: ^ \*([^/]|$) 064 * line 14: ^ \*/ 065 * </pre> 066 * 067 * <p> 068 * Lines 1 and 6 demonstrate a more compact notation for 71 '/' characters. 069 * Line 4 enforces that the copyright notice includes a four digit year. 070 * Line 5 is an example how to enforce revision control keywords in a file header. 071 * Lines 12-14 is a template for javadoc (line 13 is so complicated to remove 072 * conflict with and of javadoc comment). Lines 7, 9 and 11 will be treated 073 * as '^$' and will forcefully expect the line to be empty. 074 * </p> 075 * 076 * <p> 077 * Different programming languages have different comment syntax rules, 078 * but all of them start a comment with a non-word character. 079 * Hence, you can often use the non-word character class to abstract away 080 * the concrete comment syntax and allow checking the header for different 081 * languages with a single header definition. For example, consider the following 082 * header specification (note that this is not the full Apache license header): 083 * </p> 084 * <pre> 085 * line 1: ^#! 086 * line 2: ^<\?xml.*>$ 087 * line 3: ^\W*$ 088 * line 4: ^\W*Copyright 2006 The Apache Software Foundation or its licensors, as applicable\.$ 089 * line 5: ^\W*Licensed under the Apache License, Version 2\.0 \(the "License"\);$ 090 * line 6: ^\W*$ 091 * </pre> 092 * 093 * <p> 094 * Lines 1 and 2 leave room for technical header lines, e.g. the "#!/bin/sh" 095 * line in Unix shell scripts, or the XML file header of XML files. 096 * Set the multiline property to "1, 2" so these lines can be ignored for 097 * file types where they do no apply. Lines 3 through 6 define the actual header content. 098 * Note how lines 2, 4 and 5 use escapes for characters that have special regexp semantics. 099 * </p> 100 * 101 * <p> 102 * In default configuration, if header is not specified, the default value 103 * of header is set to null and the check does not rise any violations. 104 * </p> 105 * <ul> 106 * <li> 107 * Property {@code charset} - Specify the character encoding to use when reading the headerFile. 108 * Type is {@code java.lang.String}. 109 * Default value is {@code the charset property of the parent 110 * <a href="https://checkstyle.org/config.html#Checker">Checker</a> module}. 111 * </li> 112 * <li> 113 * Property {@code fileExtensions} - Specify the file extensions of the files to process. 114 * Type is {@code java.lang.String[]}. 115 * Default value is {@code ""}. 116 * </li> 117 * <li> 118 * Property {@code header} - Define the required header specified inline. 119 * Individual header lines must be separated by the string {@code "\n"} 120 * (even on platforms with a different line separator). 121 * For header lines containing {@code "\n\n"} checkstyle will 122 * forcefully expect an empty line to exist. See examples below. 123 * Regular expressions must not span multiple lines. 124 * Type is {@code java.lang.String}. 125 * Default value is {@code null}. 126 * </li> 127 * <li> 128 * Property {@code headerFile} - Specify the name of the file containing the required header. 129 * Type is {@code java.net.URI}. 130 * Default value is {@code null}. 131 * </li> 132 * <li> 133 * Property {@code multiLines} - Specify the line numbers to repeat (zero or more times). 134 * Type is {@code int[]}. 135 * Default value is {@code ""}. 136 * </li> 137 * </ul> 138 * 139 * <p> 140 * Parent is {@code com.puppycrawl.tools.checkstyle.Checker} 141 * </p> 142 * 143 * <p> 144 * Violation Message Keys: 145 * </p> 146 * <ul> 147 * <li> 148 * {@code header.mismatch} 149 * </li> 150 * <li> 151 * {@code header.missing} 152 * </li> 153 * </ul> 154 * 155 * @since 6.9 156 */ 157@StatelessCheck 158public class RegexpHeaderCheck extends AbstractHeaderCheck { 159 160 /** 161 * A key is pointing to the warning message text in "messages.properties" 162 * file. 163 */ 164 public static final String MSG_HEADER_MISSING = "header.missing"; 165 166 /** 167 * A key is pointing to the warning message text in "messages.properties" 168 * file. 169 */ 170 public static final String MSG_HEADER_MISMATCH = "header.mismatch"; 171 172 /** Regex pattern for a blank line. **/ 173 private static final String EMPTY_LINE_PATTERN = "^$"; 174 175 /** Compiled regex pattern for a blank line. **/ 176 private static final Pattern BLANK_LINE = Pattern.compile(EMPTY_LINE_PATTERN); 177 178 /** The compiled regular expressions. */ 179 private final List<Pattern> headerRegexps = new ArrayList<>(); 180 181 /** Specify the line numbers to repeat (zero or more times). */ 182 private BitSet multiLines = new BitSet(); 183 184 /** 185 * Setter to specify the line numbers to repeat (zero or more times). 186 * 187 * @param list line numbers to repeat in header. 188 * @since 3.4 189 */ 190 public void setMultiLines(int... list) { 191 multiLines = TokenUtil.asBitSet(list); 192 } 193 194 @Override 195 protected void processFiltered(File file, FileText fileText) { 196 final int headerSize = getHeaderLines().size(); 197 final int fileSize = fileText.size(); 198 199 if (headerSize - multiLines.cardinality() > fileSize) { 200 log(1, MSG_HEADER_MISSING); 201 } 202 else { 203 int headerLineNo = 0; 204 int index; 205 for (index = 0; headerLineNo < headerSize && index < fileSize; index++) { 206 final String line = fileText.get(index); 207 boolean isMatch = isMatch(line, headerLineNo); 208 while (!isMatch && isMultiLine(headerLineNo)) { 209 headerLineNo++; 210 isMatch = headerLineNo == headerSize 211 || isMatch(line, headerLineNo); 212 } 213 if (!isMatch) { 214 log(index + 1, MSG_HEADER_MISMATCH, getHeaderLine(headerLineNo)); 215 break; 216 } 217 if (!isMultiLine(headerLineNo)) { 218 headerLineNo++; 219 } 220 } 221 if (index == fileSize) { 222 // if file finished, but we have at least one non-multi-line 223 // header isn't completed 224 logFirstSinglelineLine(headerLineNo, headerSize); 225 } 226 } 227 } 228 229 /** 230 * Returns the line from the header. Where the line is blank return the regexp pattern 231 * for a blank line. 232 * 233 * @param headerLineNo header line number to return 234 * @return the line from the header 235 */ 236 private String getHeaderLine(int headerLineNo) { 237 String line = getHeaderLines().get(headerLineNo); 238 if (line.isEmpty()) { 239 line = EMPTY_LINE_PATTERN; 240 } 241 return line; 242 } 243 244 /** 245 * Logs warning if any non-multiline lines left in header regexp. 246 * 247 * @param startHeaderLine header line number to start from 248 * @param headerSize whole header size 249 */ 250 private void logFirstSinglelineLine(int startHeaderLine, int headerSize) { 251 for (int lineNum = startHeaderLine; lineNum < headerSize; lineNum++) { 252 if (!isMultiLine(lineNum)) { 253 log(1, MSG_HEADER_MISSING); 254 break; 255 } 256 } 257 } 258 259 /** 260 * Checks if a code line matches the required header line. 261 * 262 * @param line the code line 263 * @param headerLineNo the header line number. 264 * @return true if and only if the line matches the required header line. 265 */ 266 private boolean isMatch(String line, int headerLineNo) { 267 return headerRegexps.get(headerLineNo).matcher(line).find(); 268 } 269 270 /** 271 * Returns true if line is multiline header lines or false. 272 * 273 * @param lineNo a line number 274 * @return if {@code lineNo} is one of the repeat header lines. 275 */ 276 private boolean isMultiLine(int lineNo) { 277 return multiLines.get(lineNo + 1); 278 } 279 280 @Override 281 protected void postProcessHeaderLines() { 282 final List<String> headerLines = getHeaderLines(); 283 for (String line : headerLines) { 284 try { 285 if (line.isEmpty()) { 286 headerRegexps.add(BLANK_LINE); 287 } 288 else { 289 headerRegexps.add(Pattern.compile(line)); 290 } 291 } 292 catch (final PatternSyntaxException ex) { 293 throw new IllegalArgumentException("line " 294 + (headerRegexps.size() + 1) 295 + " in header specification" 296 + " is not a regular expression", ex); 297 } 298 } 299 } 300 301 /** 302 * Setter to define the required header specified inline. 303 * Individual header lines must be separated by the string {@code "\n"} 304 * (even on platforms with a different line separator). 305 * For header lines containing {@code "\n\n"} checkstyle will forcefully 306 * expect an empty line to exist. See examples below. 307 * Regular expressions must not span multiple lines. 308 * 309 * @param header the header value to validate and set (in that order) 310 * @since 5.0 311 */ 312 @Override 313 public void setHeader(String header) { 314 if (!CommonUtil.isBlank(header)) { 315 if (!CommonUtil.isPatternValid(header)) { 316 throw new IllegalArgumentException("Unable to parse format: " + header); 317 } 318 super.setHeader(header); 319 } 320 } 321 322}