001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2025 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.regexp;
021
022import java.util.regex.Matcher;
023import java.util.regex.Pattern;
024
025import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
026import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
027import com.puppycrawl.tools.checkstyle.api.DetailAST;
028import com.puppycrawl.tools.checkstyle.api.FileContents;
029import com.puppycrawl.tools.checkstyle.api.FileText;
030import com.puppycrawl.tools.checkstyle.api.LineColumn;
031import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
032
033/**
034 * <div>
035 * Checks that a specified pattern exists, exists less than
036 * a set number of times, or does not exist in the file.
037 * </div>
038 *
039 * <p>
040 * This check combines all the functionality provided by
041 * <a href="https://checkstyle.org/checks/header/regexpheader.html">RegexpHeader</a>
042 * except supplying the regular expression from a file.
043 * </p>
044 *
045 * <p>
046 * It differs from them in that it works in multiline mode. Its regular expression
047 * can span multiple lines and it checks this against the whole file at once.
048 * The others work in single-line mode. Their single or multiple regular expressions
049 * can only span one line. They check each of these against each line in the file in turn.
050 * </p>
051 *
052 * <p>
053 * <b>Note:</b> Because of the different mode of operation there may be some
054 * changes in the regular expressions used to achieve a particular end.
055 * </p>
056 *
057 * <p>
058 * In multiline mode...
059 * </p>
060 * <ul>
061 * <li>
062 * {@code ^} means the beginning of a line, as opposed to beginning of the input.
063 * </li>
064 * <li>
065 * For beginning of the input use {@code \A}.
066 * </li>
067 * <li>
068 * {@code $} means the end of a line, as opposed to the end of the input.
069 * </li>
070 * <li>
071 * For end of input use {@code \Z}.
072 * </li>
073 * <li>
074 * Each line in the file is terminated with a line feed character.
075 * </li>
076 * </ul>
077 *
078 * <p>
079 * <b>Note:</b> Not all regular expression engines are created equal.
080 * Some provide extra functions that others do not and some elements
081 * of the syntax may vary. This check makes use of the
082 * <a href="https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/util/regex/package-summary.html">
083 * java.util.regex package</a>; please check its documentation for details
084 * of how to construct a regular expression to achieve a particular goal.
085 * </p>
086 *
087 * <p>
088 * <b>Note:</b> When entering a regular expression as a parameter in
089 * the XML config file you must also take into account the XML rules. e.g.
090 * if you want to match a &lt; symbol you need to enter &amp;lt;.
091 * The regular expression should be entered on one line.
092 * </p>
093 *
094 * <p>
095 * <b>Note:</b> To search for parentheses () in a regular expression
096 * you must escape them like \(\). This is required by the regexp engine,
097 * otherwise it will think they are special instruction characters.
098 * </p>
099 *
100 * <p>
101 * <b>Note:</b> To search for things that mean something in XML, like
102 * &lt; you need to escape them like &amp;lt;. This is required so the
103 * XML parser does not act on them, but instead passes the correct
104 * character to the regexp engine.
105 * </p>
106 *
107 * @since 4.0
108 */
109@FileStatefulCheck
110public class RegexpCheck extends AbstractCheck {
111
112    /**
113     * A key is pointing to the warning message text in "messages.properties"
114     * file.
115     */
116    public static final String MSG_ILLEGAL_REGEXP = "illegal.regexp";
117
118    /**
119     * A key is pointing to the warning message text in "messages.properties"
120     * file.
121     */
122    public static final String MSG_REQUIRED_REGEXP = "required.regexp";
123
124    /**
125     * A key is pointing to the warning message text in "messages.properties"
126     * file.
127     */
128    public static final String MSG_DUPLICATE_REGEXP = "duplicate.regexp";
129
130    /** Default duplicate limit. */
131    private static final int DEFAULT_DUPLICATE_LIMIT = -1;
132
133    /** Default error report limit. */
134    private static final int DEFAULT_ERROR_LIMIT = 100;
135
136    /** Error count exceeded message. */
137    private static final String ERROR_LIMIT_EXCEEDED_MESSAGE =
138        "The error limit has been exceeded, "
139        + "the check is aborting, there may be more unreported errors.";
140
141    /**
142     * Specify message which is used to notify about violations,
143     * if empty then the default (hard-coded) message is used.
144     */
145    private String message;
146
147    /** Control whether to ignore matches found within comments. */
148    private boolean ignoreComments;
149
150    /** Control whether the pattern is required or illegal. */
151    private boolean illegalPattern;
152
153    /** Specify the maximum number of violations before the check will abort. */
154    private int errorLimit = DEFAULT_ERROR_LIMIT;
155
156    /**
157     * Control whether to check for duplicates of a required pattern,
158     * any negative value means no checking for duplicates,
159     * any positive value is used as the maximum number of allowed duplicates,
160     * if the limit is exceeded violations will be logged.
161     */
162    private int duplicateLimit;
163
164    /** Boolean to say if we should check for duplicates. */
165    private boolean checkForDuplicates;
166
167    /** Specify the pattern to match against. */
168    private Pattern format = Pattern.compile("^$", Pattern.MULTILINE);
169
170    /**
171     * Setter to specify message which is used to notify about violations,
172     * if empty then the default (hard-coded) message is used.
173     *
174     * @param message custom message which should be used in report.
175     * @since 4.0
176     */
177    public void setMessage(String message) {
178        this.message = message;
179    }
180
181    /**
182     * Setter to control whether to ignore matches found within comments.
183     *
184     * @param ignoreComments True if comments should be ignored.
185     * @since 4.0
186     */
187    public void setIgnoreComments(boolean ignoreComments) {
188        this.ignoreComments = ignoreComments;
189    }
190
191    /**
192     * Setter to control whether the pattern is required or illegal.
193     *
194     * @param illegalPattern True if pattern is not allowed.
195     * @since 4.0
196     */
197    public void setIllegalPattern(boolean illegalPattern) {
198        this.illegalPattern = illegalPattern;
199    }
200
201    /**
202     * Setter to specify the maximum number of violations before the check will abort.
203     *
204     * @param errorLimit the number of errors to report.
205     * @since 4.0
206     */
207    public void setErrorLimit(int errorLimit) {
208        this.errorLimit = errorLimit;
209    }
210
211    /**
212     * Setter to control whether to check for duplicates of a required pattern,
213     * any negative value means no checking for duplicates,
214     * any positive value is used as the maximum number of allowed duplicates,
215     * if the limit is exceeded violations will be logged.
216     *
217     * @param duplicateLimit negative values mean no duplicate checking,
218     *     any positive value is used as the limit.
219     * @since 4.0
220     */
221    public void setDuplicateLimit(int duplicateLimit) {
222        this.duplicateLimit = duplicateLimit;
223        checkForDuplicates = duplicateLimit > DEFAULT_DUPLICATE_LIMIT;
224    }
225
226    /**
227     * Setter to specify the pattern to match against.
228     *
229     * @param pattern the new pattern
230     * @since 4.0
231     */
232    public final void setFormat(Pattern pattern) {
233        format = CommonUtil.createPattern(pattern.pattern(), Pattern.MULTILINE);
234    }
235
236    @Override
237    public int[] getDefaultTokens() {
238        return getRequiredTokens();
239    }
240
241    @Override
242    public int[] getAcceptableTokens() {
243        return getRequiredTokens();
244    }
245
246    @Override
247    public int[] getRequiredTokens() {
248        return CommonUtil.EMPTY_INT_ARRAY;
249    }
250
251    @Override
252    public void beginTree(DetailAST rootAST) {
253        processRegexpMatches();
254    }
255
256    /**
257     * Processes the regexp matches and logs the number of errors in the file.
258     *
259     */
260    // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
261    @SuppressWarnings("deprecation")
262    private void processRegexpMatches() {
263        final Matcher matcher = format.matcher(getFileContents().getText().getFullText());
264        int errorCount = 0;
265        int matchCount = 0;
266        final FileText text = getFileContents().getText();
267        while (errorCount < errorLimit && matcher.find()) {
268            final LineColumn start = text.lineColumn(matcher.start());
269            final int startLine = start.getLine();
270
271            final boolean ignore = isIgnore(startLine, text, start, matcher);
272            if (!ignore) {
273                matchCount++;
274                if (illegalPattern || checkForDuplicates
275                        && matchCount - 1 > duplicateLimit) {
276                    errorCount++;
277                    logMessage(startLine, errorCount);
278                }
279            }
280        }
281        if (!illegalPattern && matchCount == 0) {
282            final String msg = getMessage(errorCount);
283            log(1, MSG_REQUIRED_REGEXP, msg);
284        }
285    }
286
287    /**
288     * Detect ignore situation.
289     *
290     * @param startLine position of line
291     * @param text file text
292     * @param start line column
293     * @param matcher The matcher
294     * @return true is that need to be ignored
295     */
296    // suppress deprecation until https://github.com/checkstyle/checkstyle/issues/11166
297    @SuppressWarnings("deprecation")
298    private boolean isIgnore(int startLine, FileText text, LineColumn start, Matcher matcher) {
299        final LineColumn end;
300        if (matcher.end() == 0) {
301            end = text.lineColumn(0);
302        }
303        else {
304            end = text.lineColumn(matcher.end() - 1);
305        }
306        boolean ignore = false;
307        if (ignoreComments) {
308            final FileContents theFileContents = getFileContents();
309            final int startColumn = start.getColumn();
310            final int endLine = end.getLine();
311            final int endColumn = end.getColumn();
312            ignore = theFileContents.hasIntersectionWithComment(startLine,
313                startColumn, endLine, endColumn);
314        }
315        return ignore;
316    }
317
318    /**
319     * Displays the right message.
320     *
321     * @param lineNumber the line number the message relates to.
322     * @param errorCount number of errors in the file.
323     */
324    private void logMessage(int lineNumber, int errorCount) {
325        final String msg = getMessage(errorCount);
326
327        if (illegalPattern) {
328            log(lineNumber, MSG_ILLEGAL_REGEXP, msg);
329        }
330        else {
331            log(lineNumber, MSG_DUPLICATE_REGEXP, msg);
332        }
333    }
334
335    /**
336     * Provide right message.
337     *
338     * @param errorCount number of errors in the file.
339     * @return message for violation.
340     */
341    private String getMessage(int errorCount) {
342        String msg;
343
344        if (message == null || message.isEmpty()) {
345            msg = format.pattern();
346        }
347        else {
348            msg = message;
349        }
350
351        if (errorCount >= errorLimit) {
352            msg = ERROR_LIMIT_EXCEEDED_MESSAGE + msg;
353        }
354
355        return msg;
356    }
357}