001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2025 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.regexp; 021 022import java.util.regex.Matcher; 023import java.util.regex.Pattern; 024 025import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 026import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 027import com.puppycrawl.tools.checkstyle.api.DetailAST; 028import com.puppycrawl.tools.checkstyle.api.FileContents; 029import com.puppycrawl.tools.checkstyle.api.FileText; 030import com.puppycrawl.tools.checkstyle.api.LineColumn; 031import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 032 033/** 034 * <div> 035 * Checks that a specified pattern exists, exists less than 036 * a set number of times, or does not exist in the file. 037 * </div> 038 * 039 * <p> 040 * This check combines all the functionality provided by 041 * <a href="https://checkstyle.org/checks/header/regexpheader.html">RegexpHeader</a> 042 * except supplying the regular expression from a file. 043 * </p> 044 * 045 * <p> 046 * It differs from them in that it works in multiline mode. Its regular expression 047 * can span multiple lines and it checks this against the whole file at once. 048 * The others work in single-line mode. Their single or multiple regular expressions 049 * can only span one line. They check each of these against each line in the file in turn. 050 * </p> 051 * 052 * <p> 053 * <b>Note:</b> Because of the different mode of operation there may be some 054 * changes in the regular expressions used to achieve a particular end. 055 * </p> 056 * 057 * <p> 058 * In multiline mode... 059 * </p> 060 * <ul> 061 * <li> 062 * {@code ^} means the beginning of a line, as opposed to beginning of the input. 063 * </li> 064 * <li> 065 * For beginning of the input use {@code \A}. 066 * </li> 067 * <li> 068 * {@code $} means the end of a line, as opposed to the end of the input. 069 * </li> 070 * <li> 071 * For end of input use {@code \Z}. 072 * </li> 073 * <li> 074 * Each line in the file is terminated with a line feed character. 075 * </li> 076 * </ul> 077 * 078 * <p> 079 * <b>Note:</b> Not all regular expression engines are created equal. 080 * Some provide extra functions that others do not and some elements 081 * of the syntax may vary. This check makes use of the 082 * <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/regex/package-summary.html"> 083 * java.util.regex package</a>; please check its documentation for details 084 * of how to construct a regular expression to achieve a particular goal. 085 * </p> 086 * 087 * <p> 088 * <b>Note:</b> When entering a regular expression as a parameter in 089 * the XML config file you must also take into account the XML rules. e.g. 090 * if you want to match a < symbol you need to enter &lt;. 091 * The regular expression should be entered on one line. 092 * </p> 093 * 094 * <p> 095 * <b>Note:</b> To search for parentheses () in a regular expression 096 * you must escape them like \(\). This is required by the regexp engine, 097 * otherwise it will think they are special instruction characters. 098 * </p> 099 * 100 * <p> 101 * <b>Note:</b> To search for things that mean something in XML, like 102 * < you need to escape them like &lt;. This is required so the 103 * XML parser does not act on them, but instead passes the correct 104 * character to the regexp engine. 105 * </p> 106 * 107 * @since 4.0 108 */ 109@FileStatefulCheck 110public class RegexpCheck extends AbstractCheck { 111 112 /** 113 * A key is pointing to the warning message text in "messages.properties" 114 * file. 115 */ 116 public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp"; 117 118 /** 119 * A key is pointing to the warning message text in "messages.properties" 120 * file. 121 */ 122 public static final String MSG_REQUIRED_REGEXP = "required.regexp"; 123 124 /** 125 * A key is pointing to the warning message text in "messages.properties" 126 * file. 127 */ 128 public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp"; 129 130 /** Default duplicate limit. */ 131 private static final int DEFAULT_DUPLICATE_LIMIT = -1; 132 133 /** Default error report limit. */ 134 private static final int DEFAULT_ERROR_LIMIT = 100; 135 136 /** Error count exceeded message. */ 137 private static final String ERROR_LIMIT_EXCEEDED_MESSAGE = 138 "The error limit has been exceeded, " 139 + "the check is aborting, there may be more unreported errors."; 140 141 /** 142 * Specify message which is used to notify about violations, 143 * if empty then the default (hard-coded) message is used. 144 */ 145 private String message; 146 147 /** Control whether to ignore matches found within comments. */ 148 private boolean ignoreComments; 149 150 /** Control whether the pattern is required or illegal. */ 151 private boolean illegalPattern; 152 153 /** Specify the maximum number of violations before the check will abort. */ 154 private int errorLimit = DEFAULT_ERROR_LIMIT; 155 156 /** 157 * Control whether to check for duplicates of a required pattern, 158 * any negative value means no checking for duplicates, 159 * any positive value is used as the maximum number of allowed duplicates, 160 * if the limit is exceeded violations will be logged. 161 */ 162 private int duplicateLimit; 163 164 /** Boolean to say if we should check for duplicates. */ 165 private boolean checkForDuplicates; 166 167 /** Specify the pattern to match against. */ 168 private Pattern format = Pattern.compile("^$", Pattern.MULTILINE); 169 170 /** 171 * Setter to specify message which is used to notify about violations, 172 * if empty then the default (hard-coded) message is used. 173 * 174 * @param message custom message which should be used in report. 175 * @since 4.0 176 */ 177 public void setMessage(String message) { 178 this.message = message; 179 } 180 181 /** 182 * Setter to control whether to ignore matches found within comments. 183 * 184 * @param ignoreComments True if comments should be ignored. 185 * @since 4.0 186 */ 187 public void setIgnoreComments(boolean ignoreComments) { 188 this.ignoreComments = ignoreComments; 189 } 190 191 /** 192 * Setter to control whether the pattern is required or illegal. 193 * 194 * @param illegalPattern True if pattern is not allowed. 195 * @since 4.0 196 */ 197 public void setIllegalPattern(boolean illegalPattern) { 198 this.illegalPattern = illegalPattern; 199 } 200 201 /** 202 * Setter to specify the maximum number of violations before the check will abort. 203 * 204 * @param errorLimit the number of errors to report. 205 * @since 4.0 206 */ 207 public void setErrorLimit(int errorLimit) { 208 this.errorLimit = errorLimit; 209 } 210 211 /** 212 * Setter to control whether to check for duplicates of a required pattern, 213 * any negative value means no checking for duplicates, 214 * any positive value is used as the maximum number of allowed duplicates, 215 * if the limit is exceeded violations will be logged. 216 * 217 * @param duplicateLimit negative values mean no duplicate checking, 218 * any positive value is used as the limit. 219 * @since 4.0 220 */ 221 public void setDuplicateLimit(int duplicateLimit) { 222 this.duplicateLimit = duplicateLimit; 223 checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT; 224 } 225 226 /** 227 * Setter to specify the pattern to match against. 228 * 229 * @param pattern the new pattern 230 * @since 4.0 231 */ 232 public final void setFormat(Pattern pattern) { 233 format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE); 234 } 235 236 @Override 237 public int[] getDefaultTokens() { 238 return getRequiredTokens(); 239 } 240 241 @Override 242 public int[] getAcceptableTokens() { 243 return getRequiredTokens(); 244 } 245 246 @Override 247 public int[] getRequiredTokens() { 248 return CommonUtil.EMPTY_INT_ARRAY; 249 } 250 251 @Override 252 public void beginTree(DetailAST rootAST) { 253 processRegexpMatches(); 254 } 255 256 /** 257 * Processes the regexp matches and logs the number of errors in the file. 258 * 259 */ 260 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 261 @SuppressWarnings("deprecation") 262 private void processRegexpMatches() { 263 final Matcher matcher = format.matcher(getFileContents().getText().getFullText()); 264 int errorCount = 0; 265 int matchCount = 0; 266 final FileText text = getFileContents().getText(); 267 while (errorCount < errorLimit && matcher.find()) { 268 final LineColumn start = text.lineColumn(matcher.start()); 269 final int startLine = start.getLine(); 270 271 final boolean ignore = isIgnore(startLine, text, start, matcher); 272 if (!ignore) { 273 matchCount++; 274 if (illegalPattern || checkForDuplicates 275 && matchCount - 1 > duplicateLimit) { 276 errorCount++; 277 logMessage(startLine, errorCount); 278 } 279 } 280 } 281 if (!illegalPattern && matchCount == 0) { 282 final String msg = getMessage(errorCount); 283 log(1, MSG_REQUIRED_REGEXP, msg); 284 } 285 } 286 287 /** 288 * Detect ignore situation. 289 * 290 * @param startLine position of line 291 * @param text file text 292 * @param start line column 293 * @param matcher The matcher 294 * @return true is that need to be ignored 295 */ 296 // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166 297 @SuppressWarnings("deprecation") 298 private boolean isIgnore(int startLine, FileText text, LineColumn start, Matcher matcher) { 299 final LineColumn end; 300 if (matcher.end() == 0) { 301 end = text.lineColumn(0); 302 } 303 else { 304 end = text.lineColumn(matcher.end() - 1); 305 } 306 boolean ignore = false; 307 if (ignoreComments) { 308 final FileContents theFileContents = getFileContents(); 309 final int startColumn = start.getColumn(); 310 final int endLine = end.getLine(); 311 final int endColumn = end.getColumn(); 312 ignore = theFileContents.hasIntersectionWithComment(startLine, 313 startColumn, endLine, endColumn); 314 } 315 return ignore; 316 } 317 318 /** 319 * Displays the right message. 320 * 321 * @param lineNumber the line number the message relates to. 322 * @param errorCount number of errors in the file. 323 */ 324 private void logMessage(int lineNumber, int errorCount) { 325 final String msg = getMessage(errorCount); 326 327 if (illegalPattern) { 328 log(lineNumber, MSG_ILLEGAL_REGEXP, msg); 329 } 330 else { 331 log(lineNumber, MSG_DUPLICATE_REGEXP, msg); 332 } 333 } 334 335 /** 336 * Provide right message. 337 * 338 * @param errorCount number of errors in the file. 339 * @return message for violation. 340 */ 341 private String getMessage(int errorCount) { 342 String msg; 343 344 if (message == null || message.isEmpty()) { 345 msg = format.pattern(); 346 } 347 else { 348 msg = message; 349 } 350 351 if (errorCount >= errorLimit) { 352 msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg; 353 } 354 355 return msg; 356 } 357}