001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2026 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle;
021
022import java.util.Set;
023
024import org.antlr.v4.runtime.BaseErrorListener;
025import org.antlr.v4.runtime.CharStreams;
026import org.antlr.v4.runtime.CommonTokenStream;
027import org.antlr.v4.runtime.RecognitionException;
028import org.antlr.v4.runtime.Recognizer;
029import org.antlr.v4.runtime.atn.PredictionMode;
030import org.antlr.v4.runtime.misc.ParseCancellationException;
031
032import com.puppycrawl.tools.checkstyle.api.DetailAST;
033import com.puppycrawl.tools.checkstyle.api.DetailNode;
034import com.puppycrawl.tools.checkstyle.grammar.SimpleToken;
035import com.puppycrawl.tools.checkstyle.grammar.javadoc.JavadocCommentsLexer;
036import com.puppycrawl.tools.checkstyle.grammar.javadoc.JavadocCommentsParser;
037import com.puppycrawl.tools.checkstyle.utils.JavadocUtil;
038
039/**
040 * Used for parsing Javadoc comment as DetailNode tree.
041 *
042 */
043public class JavadocDetailNodeParser {
044
045    /**
046     * Parse error while rule recognition.
047     */
048    public static final String MSG_JAVADOC_PARSE_RULE_ERROR = "javadoc.parse.rule.error";
049
050    /**
051     * Message property key for the Unclosed HTML message.
052     */
053    public static final String MSG_UNCLOSED_HTML_TAG = "javadoc.unclosedHtml";
054
055    /** Symbols with which javadoc starts. */
056    private static final String JAVADOC_START = "/**";
057
058    /**
059     * Parses the given Javadoc comment AST into a {@link ParseStatus} object.
060     *
061     * <p>
062     * This method extracts the raw Javadoc comment text from the supplied
063     * {@link DetailAST}, creates a new lexer and parser for the Javadoc grammar,
064     * and attempts to parse it into an AST of {@link DetailNode}s.
065     * The parser uses {@link PredictionMode#SLL} for
066     * faster performance and stops parsing on the first error encountered by
067     * using {@link CheckstyleParserErrorStrategy}.
068     * </p>
069     *
070     * @param javadocCommentAst
071     *        the {@link DetailAST} node representing the Javadoc comment in the
072     *        source file
073     * @return a {@link ParseStatus} containing the root of the parsed Javadoc
074     *        tree (if successful), the first non-tight HTML tag (if any), and
075     *        the error message (if parsing failed)
076     */
077    public ParseStatus parseJavadocComment(DetailAST javadocCommentAst) {
078        final int blockCommentLineNumber = javadocCommentAst.getLineNo();
079
080        final String javadocComment = JavadocUtil.getJavadocCommentContent(javadocCommentAst);
081        final ParseStatus result = new ParseStatus();
082
083        // Use a new error listener each time to be able to use
084        // one check instance for multiple files to be checked
085        // without getting side effects.
086        final DescriptiveErrorListener errorListener = new DescriptiveErrorListener();
087
088        // Log messages should have line number in scope of file,
089        // not in scope of Javadoc comment.
090        // Offset is line number of beginning of Javadoc comment.
091        errorListener.setOffset(javadocCommentAst.getLineNo() - 1);
092
093        final JavadocCommentsLexer lexer =
094                        new JavadocCommentsLexer(CharStreams.fromString(javadocComment), true);
095
096        lexer.removeErrorListeners();
097        lexer.addErrorListener(errorListener);
098
099        final CommonTokenStream tokens = new CommonTokenStream(lexer);
100        tokens.fill();
101
102        final Set<SimpleToken> unclosedTags = lexer.getUnclosedTagNameTokens();
103        final JavadocCommentsParser parser = new JavadocCommentsParser(tokens, unclosedTags);
104
105        // set prediction mode to SLL to speed up parsing
106        parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
107
108        // remove default error listeners
109        parser.removeErrorListeners();
110
111        parser.addErrorListener(errorListener);
112
113        // JavadocParserErrorStrategy stops parsing on first parse error encountered unlike the
114        // DefaultErrorStrategy used by ANTLR which rather attempts error recovery.
115        parser.setErrorHandler(new CheckstyleParserErrorStrategy());
116
117        try {
118            final JavadocCommentsParser.JavadocContext javadoc = parser.javadoc();
119            final int javadocColumnNumber = javadocCommentAst.getColumnNo()
120                            + JAVADOC_START.length();
121
122            final JavadocCommentsAstVisitor visitor = new JavadocCommentsAstVisitor(
123                    tokens, blockCommentLineNumber, javadocColumnNumber);
124            final DetailNode tree = visitor.visit(javadoc);
125
126            result.setTree(tree);
127
128            result.firstNonTightHtmlTag = visitor.getFirstNonTightHtmlTag();
129
130            result.setParseErrorMessage(errorListener.getErrorMessage());
131        }
132        catch (ParseCancellationException | IllegalArgumentException exc) {
133            result.setParseErrorMessage(errorListener.getErrorMessage());
134        }
135
136        return result;
137    }
138
139    /**
140     * Custom error listener for JavadocParser that prints user readable errors.
141     */
142    private static final class DescriptiveErrorListener extends BaseErrorListener {
143
144        /**
145         * Offset is line number of beginning of the Javadoc comment. Log
146         * messages should have line number in scope of file, not in scope of
147         * Javadoc comment.
148         */
149        private int offset;
150
151        /**
152         * Error message that appeared while parsing.
153         */
154        private ParseErrorMessage errorMessage;
155
156        /**
157         * Getter for error message during parsing.
158         *
159         * @return Error message during parsing.
160         */
161        private ParseErrorMessage getErrorMessage() {
162            return errorMessage;
163        }
164
165        /**
166         * Sets offset. Offset is line number of beginning of the Javadoc
167         * comment. Log messages should have line number in scope of file, not
168         * in scope of Javadoc comment.
169         *
170         * @param offset
171         *        offset line number
172         */
173        /* package */ void setOffset(int offset) {
174            this.offset = offset;
175        }
176
177        /**
178         * Logs parser errors in Checkstyle manner. Parser can generate error
179         * messages. There is special error that parser can generate. It is
180         * missed close HTML tag. This case is special because parser prints
181         * error like {@code "no viable alternative at input 'b \n *\n'"} and it
182         * is not clear that error is about missed close HTML tag. Other error
183         * messages are not special and logged simply as "Parse Error...".
184         *
185         * <p>{@inheritDoc}
186         */
187        @Override
188        public void syntaxError(
189                Recognizer<?, ?> recognizer, Object offendingSymbol,
190                int line, int charPositionInLine,
191                String msg, RecognitionException ex) {
192            final int lineNumber = offset + line;
193
194            final String target;
195            if (recognizer instanceof JavadocCommentsLexer lexer) {
196                target = lexer.getPreviousToken().getText();
197            }
198            else {
199                final int ruleIndex = ex.getCtx().getRuleIndex();
200                final String ruleName = recognizer.getRuleNames()[ruleIndex];
201                target = convertUpperCamelToUpperUnderscore(ruleName);
202            }
203
204            errorMessage = new ParseErrorMessage(lineNumber,
205                    MSG_JAVADOC_PARSE_RULE_ERROR, charPositionInLine, msg, target);
206
207        }
208
209        /**
210         * Converts the given {@code text} from camel case to all upper case with
211         * underscores separating each word.
212         *
213         * @param text The string to convert.
214         * @return The result of the conversion.
215         */
216        private static String convertUpperCamelToUpperUnderscore(String text) {
217            final StringBuilder result = new StringBuilder(20);
218            for (int index = 0; index < text.length(); index++) {
219                final char letter = text.charAt(index);
220                if (Character.isUpperCase(letter)) {
221                    result.append('_');
222                }
223                result.append(Character.toUpperCase(letter));
224            }
225            return result.toString();
226        }
227    }
228
229    /**
230     * Contains result of parsing javadoc comment: DetailNode tree and parse
231     * error message.
232     */
233    public static class ParseStatus {
234
235        /**
236         * DetailNode tree (is null if parsing fails).
237         */
238        private DetailNode tree;
239
240        /**
241         * Parse error message (is null if parsing is successful).
242         */
243        private ParseErrorMessage parseErrorMessage;
244
245        /**
246         * Stores the first non-tight HTML tag encountered while parsing javadoc.
247         *
248         * @see <a
249         *     href="https://checkstyle.org/writingjavadocchecks.html#Tight-HTML_rules">
250         *     Tight HTML rules</a>
251         */
252        private DetailNode firstNonTightHtmlTag;
253
254        /**
255         * Getter for DetailNode tree.
256         *
257         * @return DetailNode tree if parsing was successful, null otherwise.
258         */
259        public DetailNode getTree() {
260            return tree;
261        }
262
263        /**
264         * Sets DetailNode tree.
265         *
266         * @param tree DetailNode tree.
267         */
268        public void setTree(DetailNode tree) {
269            this.tree = tree;
270        }
271
272        /**
273         * Getter for error message during parsing.
274         *
275         * @return Error message if parsing was unsuccessful, null otherwise.
276         */
277        public ParseErrorMessage getParseErrorMessage() {
278            return parseErrorMessage;
279        }
280
281        /**
282         * Sets parse error message.
283         *
284         * @param parseErrorMessage Parse error message.
285         */
286        public void setParseErrorMessage(ParseErrorMessage parseErrorMessage) {
287            this.parseErrorMessage = parseErrorMessage;
288        }
289
290        /**
291         * This method is used to check if the javadoc parsed has non-tight HTML tags.
292         *
293         * @return returns true if the javadoc has at least one non-tight HTML tag; false otherwise
294         * @see <a
295         *     href="https://checkstyle.org/writingjavadocchecks.html#Tight-HTML_rules">
296         *     Tight HTML rules</a>
297         */
298        public boolean isNonTight() {
299            return firstNonTightHtmlTag != null;
300        }
301
302        /**
303         * Getter for the first non-tight HTML tag encountered while parsing javadoc.
304         *
305         * @return the first non-tight HTML tag that is encountered while parsing Javadoc,
306         *     if one exists
307         * @see <a href="https://checkstyle.org/writingjavadocchecks.html#Tight-HTML_rules">
308         *     Tight HTML rules</a>
309         */
310        public DetailNode getFirstNonTightHtmlTag() {
311            return firstNonTightHtmlTag;
312        }
313
314    }
315
316    /**
317     * Contains information about parse error message.
318     */
319    public static class ParseErrorMessage {
320
321        /**
322         * Line number where parse error occurred.
323         */
324        private final int lineNumber;
325
326        /**
327         * Key for error message.
328         */
329        private final String messageKey;
330
331        /**
332         * Error message arguments.
333         */
334        private final Object[] messageArguments;
335
336        /**
337         * Initializes parse error message.
338         *
339         * @param lineNumber line number
340         * @param messageKey message key
341         * @param messageArguments message arguments
342         */
343        /* package */ ParseErrorMessage(int lineNumber, String messageKey,
344                Object... messageArguments) {
345            this.lineNumber = lineNumber;
346            this.messageKey = messageKey;
347            this.messageArguments = messageArguments.clone();
348        }
349
350        /**
351         * Getter for line number where parse error occurred.
352         *
353         * @return Line number where parse error occurred.
354         */
355        public int getLineNumber() {
356            return lineNumber;
357        }
358
359        /**
360         * Getter for key for error message.
361         *
362         * @return Key for error message.
363         */
364        public String getMessageKey() {
365            return messageKey;
366        }
367
368        /**
369         * Getter for error message arguments.
370         *
371         * @return Array of error message arguments.
372         */
373        public Object[] getMessageArguments() {
374            return messageArguments.clone();
375        }
376
377    }
378}