Source code

001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2026 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.coding;
021
022import com.puppycrawl.tools.checkstyle.StatelessCheck;
023import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
024import com.puppycrawl.tools.checkstyle.api.DetailAST;
025import com.puppycrawl.tools.checkstyle.api.TokenTypes;
026import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
027
028/**
029 * <div>
030 * Checks that specified symbols (by Unicode code points or ranges) are not used in code.
031 * </div>
032 *
033 * <p>
034 * Rationale: This check helps prevent emoji symbols in code, enforce ASCII-only source files,
035 * or forbid specific Unicode characters.
036 * </p>
037 *
038 * @since 13.1.0
039 */
040@StatelessCheck
041public class IllegalSymbolCheck extends AbstractCheck {
042
043    /**
044     * A key is pointing to the warning message text in "messages.properties" file.
045     */
046    public static final String MSG_KEY = "illegal.symbol";
047
048    /** String Range Separator. */
049    private static final String RANGE_SEPARATOR = "-";
050
051    /** ASCII range upper bound (exclusive). */
052    private static final int ASCII_UPPER_BOUND = 0x80;
053
054    /** Specify the symbols to check for, as Unicode code points or ranges. */
055    private String symbolCodes = "";
056
057    /** Control whether only ASCII characters are allowed. */
058    private boolean asciiOnly;
059
060    /**
061     * Setter to specify the symbols to check for.
062     * Format: comma-separated list of hex codes or ranges
063     * (e.g., "0x2705, 0xd83c-0xd83e").
064     *
065     * @param symbols the symbols specification
066     * @since 13.1.0
067     */
068    public void setSymbolCodes(String symbols) {
069        symbolCodes = symbols;
070    }
071
072    /**
073     * Setter to control whether only ASCII characters are allowed.
074     *
075     * @param asciiOnly true to allow only ASCII characters
076     * @since 13.1.0
077     */
078    public void setAsciiOnly(boolean asciiOnly) {
079        this.asciiOnly = asciiOnly;
080    }
081
082    @Override
083    public int[] getDefaultTokens() {
084        return new int[] {
085            TokenTypes.COMMENT_CONTENT,
086        };
087    }
088
089    @Override
090    public int[] getAcceptableTokens() {
091        return new int[] {
092            TokenTypes.COMMENT_CONTENT,
093            TokenTypes.STRING_LITERAL,
094            TokenTypes.CHAR_LITERAL,
095            TokenTypes.TEXT_BLOCK_CONTENT,
096            TokenTypes.IDENT,
097        };
098    }
099
100    @Override
101    public int[] getRequiredTokens() {
102        return CommonUtil.EMPTY_INT_ARRAY;
103    }
104
105    @Override
106    public boolean isCommentNodesRequired() {
107        return true;
108    }
109
110    @Override
111    public void visitToken(DetailAST ast) {
112        final String text = ast.getText();
113        checkText(text, ast);
114    }
115
116    /**
117     * Check the text for illegal symbols.
118     *
119     * @param text the text to check
120     * @param ast the AST node
121     */
122    private void checkText(String text, DetailAST ast) {
123        final int length = text.length();
124        int offset = 0;
125
126        while (offset < length) {
127            final int codePoint = text.codePointAt(offset);
128
129            if (isIllegalSymbol(codePoint)) {
130                log(ast, MSG_KEY);
131                break;
132            }
133
134            offset += Character.charCount(codePoint);
135        }
136    }
137
138    /**
139     * Check if a code point is illegal based on configured ranges.
140     *
141     * @param codePoint the code point to check
142     * @return true if the code point is illegal
143     */
144    private boolean isIllegalSymbol(int codePoint) {
145        boolean result = false;
146
147        if (asciiOnly && codePoint >= ASCII_UPPER_BOUND) {
148            result = true;
149        }
150        else if (!symbolCodes.isEmpty()) {
151            result = isInSymbolCodes(codePoint);
152        }
153
154        return result;
155    }
156
157    /**
158     * Check if code point is in the configured symbol codes.
159     *
160     * @param codePoint the code point to check
161     * @return true if in symbol codes
162     */
163    private boolean isInSymbolCodes(int codePoint) {
164        boolean found = false;
165        final String[] parts = symbolCodes.split(",", -1);
166
167        for (String part : parts) {
168            final String trimmed = part.trim();
169            if (trimmed.contains(RANGE_SEPARATOR)) {
170                // Range format
171                found = isInRange(codePoint, trimmed);
172            }
173            else {
174                // Single code point
175                final int checkPoint = parseCodePoint(trimmed);
176                found = codePoint == checkPoint;
177            }
178
179            if (found) {
180                break;
181            }
182        }
183
184        return found;
185    }
186
187    /**
188     * Check if code point is in the specified range.
189     *
190     * @param codePoint the code point to check
191     * @param rangeStr the range string (e.g., "0x1F600-0x1F64F")
192     * @return true if in range
193     */
194    private static boolean isInRange(int codePoint, String rangeStr) {
195        final String[] range = rangeStr.split(RANGE_SEPARATOR, -1);
196        boolean result = false;
197
198        if (range.length == 2) {
199            final int start = parseCodePoint(range[0].trim());
200            final int end = parseCodePoint(range[1].trim());
201            result = codePoint >= start && codePoint <= end;
202        }
203
204        return result;
205    }
206
207    /**
208     * Parse a code point from string representation.
209     * Supports formats: 0x1234, \\u1234, U+1234, or decimal.
210     *
211     * @param str the string to parse
212     * @return the code point value
213     */
214    private static int parseCodePoint(String str) {
215        final String cleaned = str.trim();
216        final int hexRadix = 16;
217        final int result;
218
219        if (cleaned.startsWith("\\u")
220                || cleaned.startsWith("0x")
221                || cleaned.startsWith("0X")
222                || cleaned.startsWith("U+")
223                || cleaned.startsWith("u+")) {
224            result = Integer.parseInt(cleaned.substring(2), hexRadix);
225        }
226        else {
227            result = Integer.parseInt(cleaned, hexRadix);
228        }
229        return result;
230    }
231}