001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2025 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.coding; 021 022import java.util.ArrayList; 023import java.util.BitSet; 024import java.util.HashMap; 025import java.util.List; 026import java.util.Map; 027import java.util.regex.Pattern; 028 029import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 030import com.puppycrawl.tools.checkstyle.PropertyType; 031import com.puppycrawl.tools.checkstyle.XdocsPropertyType; 032import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 033import com.puppycrawl.tools.checkstyle.api.DetailAST; 034import com.puppycrawl.tools.checkstyle.api.TokenTypes; 035import com.puppycrawl.tools.checkstyle.utils.CheckUtil; 036import com.puppycrawl.tools.checkstyle.utils.TokenUtil; 037 038/** 039 * <div> 040 * Checks for multiple occurrences of the same string literal within a single file. 041 * </div> 042 * 043 * <p> 044 * Rationale: Code duplication makes maintenance more difficult, so it can be better 045 * to replace the multiple occurrences with a constant. 046 * </p> 047 * 048 * @since 3.5 049 */ 050@FileStatefulCheck 051public class MultipleStringLiteralsCheck extends AbstractCheck { 052 053 /** 054 * A key is pointing to the warning message text in "messages.properties" 055 * file. 056 */ 057 public static final String MSG_KEY = "multiple.string.literal"; 058 059 /** 060 * Compiled pattern for all system newlines. 061 */ 062 private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R"); 063 064 /** 065 * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL. 066 */ 067 private static final String QUOTE = "\""; 068 069 /** 070 * The found strings and their tokens. 071 */ 072 private final Map<String, List<DetailAST>> stringMap = new HashMap<>(); 073 074 /** 075 * Specify token type names where duplicate strings are ignored even if they 076 * don't match ignoredStringsRegexp. This allows you to exclude syntactical 077 * contexts like annotations or static initializers from the check. 078 */ 079 @XdocsPropertyType(PropertyType.TOKEN_ARRAY) 080 private final BitSet ignoreOccurrenceContext = new BitSet(); 081 082 /** 083 * Specify the maximum number of occurrences to allow without generating a warning. 084 */ 085 private int allowedDuplicates = 1; 086 087 /** 088 * Specify RegExp for ignored strings (with quotation marks). 089 */ 090 private Pattern ignoreStringsRegexp; 091 092 /** 093 * Construct an instance with default values. 094 */ 095 public MultipleStringLiteralsCheck() { 096 setIgnoreStringsRegexp(Pattern.compile("^\"\"$")); 097 ignoreOccurrenceContext.set(TokenTypes.ANNOTATION); 098 } 099 100 /** 101 * Setter to specify the maximum number of occurrences to allow without generating a warning. 102 * 103 * @param allowedDuplicates The maximum number of duplicates. 104 * @since 3.5 105 */ 106 public void setAllowedDuplicates(int allowedDuplicates) { 107 this.allowedDuplicates = allowedDuplicates; 108 } 109 110 /** 111 * Setter to specify RegExp for ignored strings (with quotation marks). 112 * 113 * @param ignoreStringsRegexp 114 * regular expression pattern for ignored strings 115 * @noinspection WeakerAccess 116 * @noinspectionreason WeakerAccess - we avoid 'protected' when possible 117 * @since 4.0 118 */ 119 public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) { 120 if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) { 121 this.ignoreStringsRegexp = null; 122 } 123 else { 124 this.ignoreStringsRegexp = ignoreStringsRegexp; 125 } 126 } 127 128 /** 129 * Setter to specify token type names where duplicate strings are ignored even 130 * if they don't match ignoredStringsRegexp. This allows you to exclude 131 * syntactical contexts like annotations or static initializers from the check. 132 * 133 * @param strRep the string representation of the tokens interested in 134 * @since 4.4 135 */ 136 public final void setIgnoreOccurrenceContext(String... strRep) { 137 ignoreOccurrenceContext.clear(); 138 for (final String s : strRep) { 139 final int type = TokenUtil.getTokenId(s); 140 ignoreOccurrenceContext.set(type); 141 } 142 } 143 144 @Override 145 public int[] getDefaultTokens() { 146 return getRequiredTokens(); 147 } 148 149 @Override 150 public int[] getAcceptableTokens() { 151 return getRequiredTokens(); 152 } 153 154 @Override 155 public int[] getRequiredTokens() { 156 return new int[] { 157 TokenTypes.STRING_LITERAL, 158 TokenTypes.TEXT_BLOCK_CONTENT, 159 }; 160 } 161 162 @Override 163 public void visitToken(DetailAST ast) { 164 if (!isInIgnoreOccurrenceContext(ast)) { 165 final String currentString; 166 if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) { 167 final String strippedString = 168 CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText()); 169 // We need to add quotes here to be consistent with STRING_LITERAL text. 170 currentString = QUOTE + strippedString + QUOTE; 171 } 172 else { 173 currentString = ast.getText(); 174 } 175 if (ignoreStringsRegexp == null 176 || !ignoreStringsRegexp.matcher(currentString).find()) { 177 stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast); 178 } 179 } 180 } 181 182 /** 183 * Analyses the path from the AST root to a given AST for occurrences 184 * of the token types in {@link #ignoreOccurrenceContext}. 185 * 186 * @param ast the node from where to start searching towards the root node 187 * @return whether the path from the root node to ast contains one of the 188 * token type in {@link #ignoreOccurrenceContext}. 189 */ 190 private boolean isInIgnoreOccurrenceContext(DetailAST ast) { 191 boolean isInIgnoreOccurrenceContext = false; 192 for (DetailAST token = ast; token != null; token = token.getParent()) { 193 final int type = token.getType(); 194 if (ignoreOccurrenceContext.get(type)) { 195 isInIgnoreOccurrenceContext = true; 196 break; 197 } 198 } 199 return isInIgnoreOccurrenceContext; 200 } 201 202 @Override 203 public void beginTree(DetailAST rootAST) { 204 stringMap.clear(); 205 } 206 207 @Override 208 public void finishTree(DetailAST rootAST) { 209 for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) { 210 final List<DetailAST> hits = stringListEntry.getValue(); 211 if (hits.size() > allowedDuplicates) { 212 final DetailAST firstFinding = hits.get(0); 213 final String recurringString = 214 ALL_NEW_LINES.matcher( 215 stringListEntry.getKey()).replaceAll("\\\\n"); 216 log(firstFinding, MSG_KEY, recurringString, hits.size()); 217 } 218 } 219 } 220} 221