001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2025 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle;
021
022import java.util.Set;
023
024import org.antlr.v4.runtime.BaseErrorListener;
025import org.antlr.v4.runtime.CharStreams;
026import org.antlr.v4.runtime.CommonTokenStream;
027import org.antlr.v4.runtime.RecognitionException;
028import org.antlr.v4.runtime.Recognizer;
029import org.antlr.v4.runtime.atn.PredictionMode;
030import org.antlr.v4.runtime.misc.ParseCancellationException;
031
032import com.puppycrawl.tools.checkstyle.api.DetailAST;
033import com.puppycrawl.tools.checkstyle.api.DetailNode;
034import com.puppycrawl.tools.checkstyle.grammar.SimpleToken;
035import com.puppycrawl.tools.checkstyle.grammar.javadoc.JavadocCommentsLexer;
036import com.puppycrawl.tools.checkstyle.grammar.javadoc.JavadocCommentsParser;
037import com.puppycrawl.tools.checkstyle.utils.JavadocUtil;
038
039/**
040 * Used for parsing Javadoc comment as DetailNode tree.
041 *
042 */
043public class JavadocDetailNodeParser {
044
045    /**
046     * Parse error while rule recognition.
047     */
048    public static final String MSG_JAVADOC_PARSE_RULE_ERROR = "javadoc.parse.rule.error";
049
050    /**
051     * Message property key for the Unclosed HTML message.
052     */
053    public static final String MSG_UNCLOSED_HTML_TAG = "javadoc.unclosedHtml";
054
055    /** Symbols with which javadoc starts. */
056    private static final String JAVADOC_START = "/**";
057
058    /**
059     * Parses the given Javadoc comment AST into a {@link ParseStatus} object.
060     *
061     * <p>
062     * This method extracts the raw Javadoc comment text from the supplied
063     * {@link DetailAST}, creates a new lexer and parser for the Javadoc grammar,
064     * and attempts to parse it into an AST of {@link DetailNode}s.
065     * The parser uses {@link PredictionMode#SLL} for
066     * faster performance and stops parsing on the first error encountered by
067     * using {@link CheckstyleParserErrorStrategy}.
068     * </p>
069     *
070     * @param javadocCommentAst
071     *        the {@link DetailAST} node representing the Javadoc comment in the
072     *        source file
073     * @return a {@link ParseStatus} containing the root of the parsed Javadoc
074     *        tree (if successful), the first non-tight HTML tag (if any), and
075     *        the error message (if parsing failed)
076     */
077    public ParseStatus parseJavadocComment(DetailAST javadocCommentAst) {
078        final int blockCommentLineNumber = javadocCommentAst.getLineNo();
079
080        final String javadocComment = JavadocUtil.getJavadocCommentContent(javadocCommentAst);
081        final ParseStatus result = new ParseStatus();
082
083        // Use a new error listener each time to be able to use
084        // one check instance for multiple files to be checked
085        // without getting side effects.
086        final DescriptiveErrorListener errorListener = new DescriptiveErrorListener();
087
088        // Log messages should have line number in scope of file,
089        // not in scope of Javadoc comment.
090        // Offset is line number of beginning of Javadoc comment.
091        errorListener.setOffset(javadocCommentAst.getLineNo() - 1);
092
093        final JavadocCommentsLexer lexer =
094                        new JavadocCommentsLexer(CharStreams.fromString(javadocComment), true);
095
096        lexer.removeErrorListeners();
097        lexer.addErrorListener(errorListener);
098
099        final CommonTokenStream tokens = new CommonTokenStream(lexer);
100        tokens.fill();
101
102        final Set<SimpleToken> unclosedTags = lexer.getUnclosedTagNameTokens();
103        final JavadocCommentsParser parser = new JavadocCommentsParser(tokens, unclosedTags);
104
105        // set prediction mode to SLL to speed up parsing
106        parser.getInterpreter().setPredictionMode(PredictionMode.SLL);
107
108        // remove default error listeners
109        parser.removeErrorListeners();
110
111        parser.addErrorListener(errorListener);
112
113        // JavadocParserErrorStrategy stops parsing on first parse error encountered unlike the
114        // DefaultErrorStrategy used by ANTLR which rather attempts error recovery.
115        parser.setErrorHandler(new CheckstyleParserErrorStrategy());
116
117        try {
118            final JavadocCommentsParser.JavadocContext javadoc = parser.javadoc();
119            final int javadocColumnNumber = javadocCommentAst.getColumnNo()
120                            + JAVADOC_START.length();
121
122            final JavadocCommentsAstVisitor visitor = new JavadocCommentsAstVisitor(
123                    tokens, blockCommentLineNumber, javadocColumnNumber);
124            final DetailNode tree = visitor.visit(javadoc);
125
126            result.setTree(tree);
127
128            result.firstNonTightHtmlTag = visitor.getFirstNonTightHtmlTag();
129
130            result.setParseErrorMessage(errorListener.getErrorMessage());
131        }
132        catch (ParseCancellationException | IllegalArgumentException exc) {
133            result.setParseErrorMessage(errorListener.getErrorMessage());
134        }
135
136        return result;
137    }
138
139    /**
140     * Custom error listener for JavadocParser that prints user readable errors.
141     */
142    private static final class DescriptiveErrorListener extends BaseErrorListener {
143
144        /**
145         * Offset is line number of beginning of the Javadoc comment. Log
146         * messages should have line number in scope of file, not in scope of
147         * Javadoc comment.
148         */
149        private int offset;
150
151        /**
152         * Error message that appeared while parsing.
153         */
154        private ParseErrorMessage errorMessage;
155
156        /**
157         * Getter for error message during parsing.
158         *
159         * @return Error message during parsing.
160         */
161        private ParseErrorMessage getErrorMessage() {
162            return errorMessage;
163        }
164
165        /**
166         * Sets offset. Offset is line number of beginning of the Javadoc
167         * comment. Log messages should have line number in scope of file, not
168         * in scope of Javadoc comment.
169         *
170         * @param offset
171         *        offset line number
172         */
173        public void setOffset(int offset) {
174            this.offset = offset;
175        }
176
177        /**
178         * Logs parser errors in Checkstyle manner. Parser can generate error
179         * messages. There is special error that parser can generate. It is
180         * missed close HTML tag. This case is special because parser prints
181         * error like {@code "no viable alternative at input 'b \n *\n'"} and it
182         * is not clear that error is about missed close HTML tag. Other error
183         * messages are not special and logged simply as "Parse Error...".
184         *
185         * <p>{@inheritDoc}
186         */
187        @Override
188        public void syntaxError(
189                Recognizer<?, ?> recognizer, Object offendingSymbol,
190                int line, int charPositionInLine,
191                String msg, RecognitionException ex) {
192            final int lineNumber = offset + line;
193
194            final String target;
195            if (recognizer instanceof JavadocCommentsLexer lexer) {
196                target = lexer.getPreviousToken().getText();
197            }
198            else {
199                final int ruleIndex = ex.getCtx().getRuleIndex();
200                final String ruleName = recognizer.getRuleNames()[ruleIndex];
201                target = convertUpperCamelToUpperUnderscore(ruleName);
202            }
203
204            errorMessage = new ParseErrorMessage(lineNumber,
205                    MSG_JAVADOC_PARSE_RULE_ERROR, charPositionInLine, msg, target);
206
207        }
208
209        /**
210         * Converts the given {@code text} from camel case to all upper case with
211         * underscores separating each word.
212         *
213         * @param text The string to convert.
214         * @return The result of the conversion.
215         */
216        private static String convertUpperCamelToUpperUnderscore(String text) {
217            final StringBuilder result = new StringBuilder(20);
218            for (char letter : text.toCharArray()) {
219                if (Character.isUpperCase(letter)) {
220                    result.append('_');
221                }
222                result.append(Character.toUpperCase(letter));
223            }
224            return result.toString();
225        }
226    }
227
228    /**
229     * Contains result of parsing javadoc comment: DetailNode tree and parse
230     * error message.
231     */
232    public static class ParseStatus {
233
234        /**
235         * DetailNode tree (is null if parsing fails).
236         */
237        private DetailNode tree;
238
239        /**
240         * Parse error message (is null if parsing is successful).
241         */
242        private ParseErrorMessage parseErrorMessage;
243
244        /**
245         * Stores the first non-tight HTML tag encountered while parsing javadoc.
246         *
247         * @see <a
248         *     href="https://checkstyle.org/writingjavadocchecks.html#Tight-HTML_rules">
249         *     Tight HTML rules</a>
250         */
251        private DetailNode firstNonTightHtmlTag;
252
253        /**
254         * Getter for DetailNode tree.
255         *
256         * @return DetailNode tree if parsing was successful, null otherwise.
257         */
258        public DetailNode getTree() {
259            return tree;
260        }
261
262        /**
263         * Sets DetailNode tree.
264         *
265         * @param tree DetailNode tree.
266         */
267        public void setTree(DetailNode tree) {
268            this.tree = tree;
269        }
270
271        /**
272         * Getter for error message during parsing.
273         *
274         * @return Error message if parsing was unsuccessful, null otherwise.
275         */
276        public ParseErrorMessage getParseErrorMessage() {
277            return parseErrorMessage;
278        }
279
280        /**
281         * Sets parse error message.
282         *
283         * @param parseErrorMessage Parse error message.
284         */
285        public void setParseErrorMessage(ParseErrorMessage parseErrorMessage) {
286            this.parseErrorMessage = parseErrorMessage;
287        }
288
289        /**
290         * This method is used to check if the javadoc parsed has non-tight HTML tags.
291         *
292         * @return returns true if the javadoc has at least one non-tight HTML tag; false otherwise
293         * @see <a
294         *     href="https://checkstyle.org/writingjavadocchecks.html#Tight-HTML_rules">
295         *     Tight HTML rules</a>
296         */
297        public boolean isNonTight() {
298            return firstNonTightHtmlTag != null;
299        }
300
301        /**
302         * Getter for the first non-tight HTML tag encountered while parsing javadoc.
303         *
304         * @return the first non-tight HTML tag that is encountered while parsing Javadoc,
305         *     if one exists
306         * @see <a href="https://checkstyle.org/writingjavadocchecks.html#Tight-HTML_rules">
307         *     Tight HTML rules</a>
308         */
309        public DetailNode getFirstNonTightHtmlTag() {
310            return firstNonTightHtmlTag;
311        }
312
313    }
314
315    /**
316     * Contains information about parse error message.
317     */
318    public static class ParseErrorMessage {
319
320        /**
321         * Line number where parse error occurred.
322         */
323        private final int lineNumber;
324
325        /**
326         * Key for error message.
327         */
328        private final String messageKey;
329
330        /**
331         * Error message arguments.
332         */
333        private final Object[] messageArguments;
334
335        /**
336         * Initializes parse error message.
337         *
338         * @param lineNumber line number
339         * @param messageKey message key
340         * @param messageArguments message arguments
341         */
342        /* package */ ParseErrorMessage(int lineNumber, String messageKey,
343                Object... messageArguments) {
344            this.lineNumber = lineNumber;
345            this.messageKey = messageKey;
346            this.messageArguments = messageArguments.clone();
347        }
348
349        /**
350         * Getter for line number where parse error occurred.
351         *
352         * @return Line number where parse error occurred.
353         */
354        public int getLineNumber() {
355            return lineNumber;
356        }
357
358        /**
359         * Getter for key for error message.
360         *
361         * @return Key for error message.
362         */
363        public String getMessageKey() {
364            return messageKey;
365        }
366
367        /**
368         * Getter for error message arguments.
369         *
370         * @return Array of error message arguments.
371         */
372        public Object[] getMessageArguments() {
373            return messageArguments.clone();
374        }
375
376    }
377}