001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2025 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.coding;
021
022import java.util.ArrayList;
023import java.util.BitSet;
024import java.util.HashMap;
025import java.util.List;
026import java.util.Map;
027import java.util.regex.Pattern;
028
029import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
030import com.puppycrawl.tools.checkstyle.PropertyType;
031import com.puppycrawl.tools.checkstyle.XdocsPropertyType;
032import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
033import com.puppycrawl.tools.checkstyle.api.DetailAST;
034import com.puppycrawl.tools.checkstyle.api.TokenTypes;
035import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
036import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
037
038/**
039 * <div>
040 * Checks for multiple occurrences of the same string literal within a single file.
041 * </div>
042 *
043 * <p>
044 * Rationale: Code duplication makes maintenance more difficult, so it can be better
045 * to replace the multiple occurrences with a constant.
046 * </p>
047 *
048 * @since 3.5
049 */
050@FileStatefulCheck
051public class MultipleStringLiteralsCheck extends AbstractCheck {
052
053    /**
054     * A key is pointing to the warning message text in "messages.properties"
055     * file.
056     */
057    public static final String MSG_KEY = "multiple.string.literal";
058
059    /**
060     * Compiled pattern for all system newlines.
061     */
062    private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R");
063
064    /**
065     * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL.
066     */
067    private static final String QUOTE = "\"";
068
069    /**
070     * The found strings and their tokens.
071     */
072    private final Map<String, List<DetailAST>> stringMap = new HashMap<>();
073
074    /**
075     * Specify token type names where duplicate strings are ignored even if they
076     * don't match ignoredStringsRegexp. This allows you to exclude syntactical
077     * contexts like annotations or static initializers from the check.
078     */
079    @XdocsPropertyType(PropertyType.TOKEN_ARRAY)
080    private final BitSet ignoreOccurrenceContext = new BitSet();
081
082    /**
083     * Specify the maximum number of occurrences to allow without generating a warning.
084     */
085    private int allowedDuplicates = 1;
086
087    /**
088     * Specify RegExp for ignored strings (with quotation marks).
089     */
090    private Pattern ignoreStringsRegexp;
091
092    /**
093     * Construct an instance with default values.
094     */
095    public MultipleStringLiteralsCheck() {
096        setIgnoreStringsRegexp(Pattern.compile("^\"\"$"));
097        ignoreOccurrenceContext.set(TokenTypes.ANNOTATION);
098    }
099
100    /**
101     * Setter to specify the maximum number of occurrences to allow without generating a warning.
102     *
103     * @param allowedDuplicates The maximum number of duplicates.
104     * @since 3.5
105     */
106    public void setAllowedDuplicates(int allowedDuplicates) {
107        this.allowedDuplicates = allowedDuplicates;
108    }
109
110    /**
111     * Setter to specify RegExp for ignored strings (with quotation marks).
112     *
113     * @param ignoreStringsRegexp
114     *        regular expression pattern for ignored strings
115     * @noinspection WeakerAccess
116     * @noinspectionreason WeakerAccess - we avoid 'protected' when possible
117     * @since 4.0
118     */
119    public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) {
120        if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) {
121            this.ignoreStringsRegexp = null;
122        }
123        else {
124            this.ignoreStringsRegexp = ignoreStringsRegexp;
125        }
126    }
127
128    /**
129     * Setter to specify token type names where duplicate strings are ignored even
130     * if they don't match ignoredStringsRegexp. This allows you to exclude
131     * syntactical contexts like annotations or static initializers from the check.
132     *
133     * @param strRep the string representation of the tokens interested in
134     * @since 4.4
135     */
136    public final void setIgnoreOccurrenceContext(String... strRep) {
137        ignoreOccurrenceContext.clear();
138        for (final String s : strRep) {
139            final int type = TokenUtil.getTokenId(s);
140            ignoreOccurrenceContext.set(type);
141        }
142    }
143
144    @Override
145    public int[] getDefaultTokens() {
146        return getRequiredTokens();
147    }
148
149    @Override
150    public int[] getAcceptableTokens() {
151        return getRequiredTokens();
152    }
153
154    @Override
155    public int[] getRequiredTokens() {
156        return new int[] {
157            TokenTypes.STRING_LITERAL,
158            TokenTypes.TEXT_BLOCK_CONTENT,
159        };
160    }
161
162    @Override
163    public void visitToken(DetailAST ast) {
164        if (!isInIgnoreOccurrenceContext(ast)) {
165            final String currentString;
166            if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) {
167                final String strippedString =
168                    CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText());
169                // We need to add quotes here to be consistent with STRING_LITERAL text.
170                currentString = QUOTE + strippedString + QUOTE;
171            }
172            else {
173                currentString = ast.getText();
174            }
175            if (ignoreStringsRegexp == null
176                    || !ignoreStringsRegexp.matcher(currentString).find()) {
177                stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast);
178            }
179        }
180    }
181
182    /**
183     * Analyses the path from the AST root to a given AST for occurrences
184     * of the token types in {@link #ignoreOccurrenceContext}.
185     *
186     * @param ast the node from where to start searching towards the root node
187     * @return whether the path from the root node to ast contains one of the
188     *     token type in {@link #ignoreOccurrenceContext}.
189     */
190    private boolean isInIgnoreOccurrenceContext(DetailAST ast) {
191        boolean isInIgnoreOccurrenceContext = false;
192        for (DetailAST token = ast; token != null; token = token.getParent()) {
193            final int type = token.getType();
194            if (ignoreOccurrenceContext.get(type)) {
195                isInIgnoreOccurrenceContext = true;
196                break;
197            }
198        }
199        return isInIgnoreOccurrenceContext;
200    }
201
202    @Override
203    public void beginTree(DetailAST rootAST) {
204        stringMap.clear();
205    }
206
207    @Override
208    public void finishTree(DetailAST rootAST) {
209        for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) {
210            final List<DetailAST> hits = stringListEntry.getValue();
211            if (hits.size() > allowedDuplicates) {
212                final DetailAST firstFinding = hits.get(0);
213                final String recurringString =
214                    ALL_NEW_LINES.matcher(
215                        stringListEntry.getKey()).replaceAll("\\\\n");
216                log(firstFinding, MSG_KEY, recurringString, hits.size());
217            }
218        }
219    }
220}
221