001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2024 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.coding;
021
022import java.util.ArrayList;
023import java.util.BitSet;
024import java.util.HashMap;
025import java.util.List;
026import java.util.Map;
027import java.util.regex.Pattern;
028
029import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
030import com.puppycrawl.tools.checkstyle.PropertyType;
031import com.puppycrawl.tools.checkstyle.XdocsPropertyType;
032import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
033import com.puppycrawl.tools.checkstyle.api.DetailAST;
034import com.puppycrawl.tools.checkstyle.api.TokenTypes;
035import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
036import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
037
038/**
039 * <div>
040 * Checks for multiple occurrences of the same string literal within a single file.
041 * </div>
042 *
043 * <p>
044 * Rationale: Code duplication makes maintenance more difficult, so it can be better
045 * to replace the multiple occurrences with a constant.
046 * </p>
047 * <ul>
048 * <li>
049 * Property {@code allowedDuplicates} - Specify the maximum number of occurrences
050 * to allow without generating a warning.
051 * Type is {@code int}.
052 * Default value is {@code 1}.
053 * </li>
054 * <li>
055 * Property {@code ignoreOccurrenceContext} - Specify token type names where duplicate
056 * strings are ignored even if they don't match ignoredStringsRegexp. This allows you to
057 * exclude syntactical contexts like annotations or static initializers from the check.
058 * Type is {@code java.lang.String[]}.
059 * Validation type is {@code tokenTypesSet}.
060 * Default value is
061 * <a href="https://checkstyle.org/apidocs/com/puppycrawl/tools/checkstyle/api/TokenTypes.html#ANNOTATION">
062 * ANNOTATION</a>.
063 * </li>
064 * <li>
065 * Property {@code ignoreStringsRegexp} - Specify RegExp for ignored strings (with quotation marks).
066 * Type is {@code java.util.regex.Pattern}.
067 * Default value is {@code "^""$"}.
068 * </li>
069 * </ul>
070 *
071 * <p>
072 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
073 * </p>
074 *
075 * <p>
076 * Violation Message Keys:
077 * </p>
078 * <ul>
079 * <li>
080 * {@code multiple.string.literal}
081 * </li>
082 * </ul>
083 *
084 * @since 3.5
085 */
086@FileStatefulCheck
087public class MultipleStringLiteralsCheck extends AbstractCheck {
088
089    /**
090     * A key is pointing to the warning message text in "messages.properties"
091     * file.
092     */
093    public static final String MSG_KEY = "multiple.string.literal";
094
095    /**
096     * Compiled pattern for all system newlines.
097     */
098    private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R");
099
100    /**
101     * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL.
102     */
103    private static final String QUOTE = "\"";
104
105    /**
106     * The found strings and their tokens.
107     */
108    private final Map<String, List<DetailAST>> stringMap = new HashMap<>();
109
110    /**
111     * Specify token type names where duplicate strings are ignored even if they
112     * don't match ignoredStringsRegexp. This allows you to exclude syntactical
113     * contexts like annotations or static initializers from the check.
114     */
115    @XdocsPropertyType(PropertyType.TOKEN_ARRAY)
116    private final BitSet ignoreOccurrenceContext = new BitSet();
117
118    /**
119     * Specify the maximum number of occurrences to allow without generating a warning.
120     */
121    private int allowedDuplicates = 1;
122
123    /**
124     * Specify RegExp for ignored strings (with quotation marks).
125     */
126    private Pattern ignoreStringsRegexp;
127
128    /**
129     * Construct an instance with default values.
130     */
131    public MultipleStringLiteralsCheck() {
132        setIgnoreStringsRegexp(Pattern.compile("^\"\"$"));
133        ignoreOccurrenceContext.set(TokenTypes.ANNOTATION);
134    }
135
136    /**
137     * Setter to specify the maximum number of occurrences to allow without generating a warning.
138     *
139     * @param allowedDuplicates The maximum number of duplicates.
140     * @since 3.5
141     */
142    public void setAllowedDuplicates(int allowedDuplicates) {
143        this.allowedDuplicates = allowedDuplicates;
144    }
145
146    /**
147     * Setter to specify RegExp for ignored strings (with quotation marks).
148     *
149     * @param ignoreStringsRegexp
150     *        regular expression pattern for ignored strings
151     * @noinspection WeakerAccess
152     * @noinspectionreason WeakerAccess - we avoid 'protected' when possible
153     * @since 4.0
154     */
155    public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) {
156        if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) {
157            this.ignoreStringsRegexp = null;
158        }
159        else {
160            this.ignoreStringsRegexp = ignoreStringsRegexp;
161        }
162    }
163
164    /**
165     * Setter to specify token type names where duplicate strings are ignored even
166     * if they don't match ignoredStringsRegexp. This allows you to exclude
167     * syntactical contexts like annotations or static initializers from the check.
168     *
169     * @param strRep the string representation of the tokens interested in
170     * @since 4.4
171     */
172    public final void setIgnoreOccurrenceContext(String... strRep) {
173        ignoreOccurrenceContext.clear();
174        for (final String s : strRep) {
175            final int type = TokenUtil.getTokenId(s);
176            ignoreOccurrenceContext.set(type);
177        }
178    }
179
180    @Override
181    public int[] getDefaultTokens() {
182        return getRequiredTokens();
183    }
184
185    @Override
186    public int[] getAcceptableTokens() {
187        return getRequiredTokens();
188    }
189
190    @Override
191    public int[] getRequiredTokens() {
192        return new int[] {
193            TokenTypes.STRING_LITERAL,
194            TokenTypes.TEXT_BLOCK_CONTENT,
195        };
196    }
197
198    @Override
199    public void visitToken(DetailAST ast) {
200        if (!isInIgnoreOccurrenceContext(ast)) {
201            final String currentString;
202            if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) {
203                final String strippedString =
204                    CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText());
205                // We need to add quotes here to be consistent with STRING_LITERAL text.
206                currentString = QUOTE + strippedString + QUOTE;
207            }
208            else {
209                currentString = ast.getText();
210            }
211            if (ignoreStringsRegexp == null
212                    || !ignoreStringsRegexp.matcher(currentString).find()) {
213                stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast);
214            }
215        }
216    }
217
218    /**
219     * Analyses the path from the AST root to a given AST for occurrences
220     * of the token types in {@link #ignoreOccurrenceContext}.
221     *
222     * @param ast the node from where to start searching towards the root node
223     * @return whether the path from the root node to ast contains one of the
224     *     token type in {@link #ignoreOccurrenceContext}.
225     */
226    private boolean isInIgnoreOccurrenceContext(DetailAST ast) {
227        boolean isInIgnoreOccurrenceContext = false;
228        for (DetailAST token = ast; token != null; token = token.getParent()) {
229            final int type = token.getType();
230            if (ignoreOccurrenceContext.get(type)) {
231                isInIgnoreOccurrenceContext = true;
232                break;
233            }
234        }
235        return isInIgnoreOccurrenceContext;
236    }
237
238    @Override
239    public void beginTree(DetailAST rootAST) {
240        stringMap.clear();
241    }
242
243    @Override
244    public void finishTree(DetailAST rootAST) {
245        for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) {
246            final List<DetailAST> hits = stringListEntry.getValue();
247            if (hits.size() > allowedDuplicates) {
248                final DetailAST firstFinding = hits.get(0);
249                final String recurringString =
250                    ALL_NEW_LINES.matcher(
251                        stringListEntry.getKey()).replaceAll("\\\\n");
252                log(firstFinding, MSG_KEY, recurringString, hits.size());
253            }
254        }
255    }
256}
257