1 ///////////////////////////////////////////////////////////////////////////////////////////////
2 // checkstyle: Checks Java source code and other text files for adherence to a set of rules.
3 // Copyright (C) 2001-2025 the original author or authors.
4 //
5 // This library is free software; you can redistribute it and/or
6 // modify it under the terms of the GNU Lesser General Public
7 // License as published by the Free Software Foundation; either
8 // version 2.1 of the License, or (at your option) any later version.
9 //
10 // This library is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 // Lesser General Public License for more details.
14 //
15 // You should have received a copy of the GNU Lesser General Public
16 // License along with this library; if not, write to the Free Software
17 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 ///////////////////////////////////////////////////////////////////////////////////////////////
19
20 package com.puppycrawl.tools.checkstyle;
21
22 import java.util.Set;
23
24 import org.antlr.v4.runtime.BaseErrorListener;
25 import org.antlr.v4.runtime.CharStreams;
26 import org.antlr.v4.runtime.CommonTokenStream;
27 import org.antlr.v4.runtime.RecognitionException;
28 import org.antlr.v4.runtime.Recognizer;
29 import org.antlr.v4.runtime.atn.PredictionMode;
30 import org.antlr.v4.runtime.misc.ParseCancellationException;
31
32 import com.puppycrawl.tools.checkstyle.api.DetailAST;
33 import com.puppycrawl.tools.checkstyle.api.DetailNode;
34 import com.puppycrawl.tools.checkstyle.grammar.SimpleToken;
35 import com.puppycrawl.tools.checkstyle.grammar.javadoc.JavadocCommentsLexer;
36 import com.puppycrawl.tools.checkstyle.grammar.javadoc.JavadocCommentsParser;
37 import com.puppycrawl.tools.checkstyle.utils.JavadocUtil;
38
39 /**
40 * Used for parsing Javadoc comment as DetailNode tree.
41 *
42 */
43 public class JavadocDetailNodeParser {
44
45 /**
46 * Parse error while rule recognition.
47 */
48 public static final String MSG_JAVADOC_PARSE_RULE_ERROR = "javadoc.parse.rule.error";
49
50 /**
51 * Message property key for the Unclosed HTML message.
52 */
53 public static final String MSG_UNCLOSED_HTML_TAG = "javadoc.unclosedHtml";
54
55 /** Symbols with which javadoc starts. */
56 private static final String JAVADOC_START = "/**";
57
58 /**
59 * Parses the given Javadoc comment AST into a {@link ParseStatus} object.
60 *
61 * <p>
62 * This method extracts the raw Javadoc comment text from the supplied
63 * {@link DetailAST}, creates a new lexer and parser for the Javadoc grammar,
64 * and attempts to parse it into an AST of {@link DetailNode}s.
65 * The parser uses {@link PredictionMode#SLL} for
66 * faster performance and stops parsing on the first error encountered by
67 * using {@link CheckstyleParserErrorStrategy}.
68 * </p>
69 *
70 * @param javadocCommentAst
71 * the {@link DetailAST} node representing the Javadoc comment in the
72 * source file
73 * @return a {@link ParseStatus} containing the root of the parsed Javadoc
74 * tree (if successful), the first non-tight HTML tag (if any), and
75 * the error message (if parsing failed)
76 */
77 public ParseStatus parseJavadocComment(DetailAST javadocCommentAst) {
78 final int blockCommentLineNumber = javadocCommentAst.getLineNo();
79
80 final String javadocComment = JavadocUtil.getJavadocCommentContent(javadocCommentAst);
81 final ParseStatus result = new ParseStatus();
82
83 // Use a new error listener each time to be able to use
84 // one check instance for multiple files to be checked
85 // without getting side effects.
86 final DescriptiveErrorListener errorListener = new DescriptiveErrorListener();
87
88 // Log messages should have line number in scope of file,
89 // not in scope of Javadoc comment.
90 // Offset is line number of beginning of Javadoc comment.
91 errorListener.setOffset(javadocCommentAst.getLineNo() - 1);
92
93 final JavadocCommentsLexer lexer =
94 new JavadocCommentsLexer(CharStreams.fromString(javadocComment), true);
95
96 lexer.removeErrorListeners();
97 lexer.addErrorListener(errorListener);
98
99 final CommonTokenStream tokens = new CommonTokenStream(lexer);
100 tokens.fill();
101
102 final Set<SimpleToken> unclosedTags = lexer.getUnclosedTagNameTokens();
103 final JavadocCommentsParser parser = new JavadocCommentsParser(tokens, unclosedTags);
104
105 // set prediction mode to SLL to speed up parsing
106 parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
107
108 // remove default error listeners
109 parser.removeErrorListeners();
110
111 parser.addErrorListener(errorListener);
112
113 // JavadocParserErrorStrategy stops parsing on first parse error encountered unlike the
114 // DefaultErrorStrategy used by ANTLR which rather attempts error recovery.
115 parser.setErrorHandler(new CheckstyleParserErrorStrategy());
116
117 try {
118 final JavadocCommentsParser.JavadocContext javadoc = parser.javadoc();
119 final int javadocColumnNumber = javadocCommentAst.getColumnNo()
120 + JAVADOC_START.length();
121
122 final JavadocCommentsAstVisitor visitor = new JavadocCommentsAstVisitor(
123 tokens, blockCommentLineNumber, javadocColumnNumber);
124 final DetailNode tree = visitor.visit(javadoc);
125
126 result.setTree(tree);
127
128 result.firstNonTightHtmlTag = visitor.getFirstNonTightHtmlTag();
129
130 result.setParseErrorMessage(errorListener.getErrorMessage());
131 }
132 catch (ParseCancellationException | IllegalArgumentException exc) {
133 result.setParseErrorMessage(errorListener.getErrorMessage());
134 }
135
136 return result;
137 }
138
139 /**
140 * Custom error listener for JavadocParser that prints user readable errors.
141 */
142 private static final class DescriptiveErrorListener extends BaseErrorListener {
143
144 /**
145 * Offset is line number of beginning of the Javadoc comment. Log
146 * messages should have line number in scope of file, not in scope of
147 * Javadoc comment.
148 */
149 private int offset;
150
151 /**
152 * Error message that appeared while parsing.
153 */
154 private ParseErrorMessage errorMessage;
155
156 /**
157 * Getter for error message during parsing.
158 *
159 * @return Error message during parsing.
160 */
161 private ParseErrorMessage getErrorMessage() {
162 return errorMessage;
163 }
164
165 /**
166 * Sets offset. Offset is line number of beginning of the Javadoc
167 * comment. Log messages should have line number in scope of file, not
168 * in scope of Javadoc comment.
169 *
170 * @param offset
171 * offset line number
172 */
173 public void setOffset(int offset) {
174 this.offset = offset;
175 }
176
177 /**
178 * Logs parser errors in Checkstyle manner. Parser can generate error
179 * messages. There is special error that parser can generate. It is
180 * missed close HTML tag. This case is special because parser prints
181 * error like {@code "no viable alternative at input 'b \n *\n'"} and it
182 * is not clear that error is about missed close HTML tag. Other error
183 * messages are not special and logged simply as "Parse Error...".
184 *
185 * <p>{@inheritDoc}
186 */
187 @Override
188 public void syntaxError(
189 Recognizer<?, ?> recognizer, Object offendingSymbol,
190 int line, int charPositionInLine,
191 String msg, RecognitionException ex) {
192 final int lineNumber = offset + line;
193
194 final String target;
195 if (recognizer instanceof JavadocCommentsLexer lexer) {
196 target = lexer.getPreviousToken().getText();
197 }
198 else {
199 final int ruleIndex = ex.getCtx().getRuleIndex();
200 final String ruleName = recognizer.getRuleNames()[ruleIndex];
201 target = convertUpperCamelToUpperUnderscore(ruleName);
202 }
203
204 errorMessage = new ParseErrorMessage(lineNumber,
205 MSG_JAVADOC_PARSE_RULE_ERROR, charPositionInLine, msg, target);
206
207 }
208
209 /**
210 * Converts the given {@code text} from camel case to all upper case with
211 * underscores separating each word.
212 *
213 * @param text The string to convert.
214 * @return The result of the conversion.
215 */
216 private static String convertUpperCamelToUpperUnderscore(String text) {
217 final StringBuilder result = new StringBuilder(20);
218 for (char letter : text.toCharArray()) {
219 if (Character.isUpperCase(letter)) {
220 result.append('_');
221 }
222 result.append(Character.toUpperCase(letter));
223 }
224 return result.toString();
225 }
226 }
227
228 /**
229 * Contains result of parsing javadoc comment: DetailNode tree and parse
230 * error message.
231 */
232 public static class ParseStatus {
233
234 /**
235 * DetailNode tree (is null if parsing fails).
236 */
237 private DetailNode tree;
238
239 /**
240 * Parse error message (is null if parsing is successful).
241 */
242 private ParseErrorMessage parseErrorMessage;
243
244 /**
245 * Stores the first non-tight HTML tag encountered while parsing javadoc.
246 *
247 * @see <a
248 * href="https://checkstyle.org/writingjavadocchecks.html#Tight-HTML_rules">
249 * Tight HTML rules</a>
250 */
251 private DetailNode firstNonTightHtmlTag;
252
253 /**
254 * Getter for DetailNode tree.
255 *
256 * @return DetailNode tree if parsing was successful, null otherwise.
257 */
258 public DetailNode getTree() {
259 return tree;
260 }
261
262 /**
263 * Sets DetailNode tree.
264 *
265 * @param tree DetailNode tree.
266 */
267 public void setTree(DetailNode tree) {
268 this.tree = tree;
269 }
270
271 /**
272 * Getter for error message during parsing.
273 *
274 * @return Error message if parsing was unsuccessful, null otherwise.
275 */
276 public ParseErrorMessage getParseErrorMessage() {
277 return parseErrorMessage;
278 }
279
280 /**
281 * Sets parse error message.
282 *
283 * @param parseErrorMessage Parse error message.
284 */
285 public void setParseErrorMessage(ParseErrorMessage parseErrorMessage) {
286 this.parseErrorMessage = parseErrorMessage;
287 }
288
289 /**
290 * This method is used to check if the javadoc parsed has non-tight HTML tags.
291 *
292 * @return returns true if the javadoc has at least one non-tight HTML tag; false otherwise
293 * @see <a
294 * href="https://checkstyle.org/writingjavadocchecks.html#Tight-HTML_rules">
295 * Tight HTML rules</a>
296 */
297 public boolean isNonTight() {
298 return firstNonTightHtmlTag != null;
299 }
300
301 /**
302 * Getter for the first non-tight HTML tag encountered while parsing javadoc.
303 *
304 * @return the first non-tight HTML tag that is encountered while parsing Javadoc,
305 * if one exists
306 * @see <a href="https://checkstyle.org/writingjavadocchecks.html#Tight-HTML_rules">
307 * Tight HTML rules</a>
308 */
309 public DetailNode getFirstNonTightHtmlTag() {
310 return firstNonTightHtmlTag;
311 }
312
313 }
314
315 /**
316 * Contains information about parse error message.
317 */
318 public static class ParseErrorMessage {
319
320 /**
321 * Line number where parse error occurred.
322 */
323 private final int lineNumber;
324
325 /**
326 * Key for error message.
327 */
328 private final String messageKey;
329
330 /**
331 * Error message arguments.
332 */
333 private final Object[] messageArguments;
334
335 /**
336 * Initializes parse error message.
337 *
338 * @param lineNumber line number
339 * @param messageKey message key
340 * @param messageArguments message arguments
341 */
342 /* package */ ParseErrorMessage(int lineNumber, String messageKey,
343 Object... messageArguments) {
344 this.lineNumber = lineNumber;
345 this.messageKey = messageKey;
346 this.messageArguments = messageArguments.clone();
347 }
348
349 /**
350 * Getter for line number where parse error occurred.
351 *
352 * @return Line number where parse error occurred.
353 */
354 public int getLineNumber() {
355 return lineNumber;
356 }
357
358 /**
359 * Getter for key for error message.
360 *
361 * @return Key for error message.
362 */
363 public String getMessageKey() {
364 return messageKey;
365 }
366
367 /**
368 * Getter for error message arguments.
369 *
370 * @return Array of error message arguments.
371 */
372 public Object[] getMessageArguments() {
373 return messageArguments.clone();
374 }
375
376 }
377 }