001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2026 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.coding; 021 022import com.puppycrawl.tools.checkstyle.StatelessCheck; 023import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 024import com.puppycrawl.tools.checkstyle.api.DetailAST; 025import com.puppycrawl.tools.checkstyle.api.TokenTypes; 026import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 027 028/** 029 * <div> 030 * Checks that specified symbols (by Unicode code points or ranges) are not used in code. 031 * By default, blocks common symbol ranges (U+2190–U+27BF and U+1F700–U+10FFFF). 032 * </div> 033 * 034 * <p> 035 * Rationale: This check helps prevent emoji symbols and special characters in code 036 * (commonly added by AI tools), enforce coding standards, or forbid specific Unicode characters. 037 * </p> 038 * 039 * <p> 040 * Default ranges cover: 041 * </p> 042 * <ul> 043 * <li>U+2190–U+27BF: Arrows, Mathematical Operators, Box Drawing, Geometric Shapes, 044 * Miscellaneous Symbols, and Dingbats</li> 045 * <li>U+1F700–U+10FFFF: Alchemical Symbols, Emoticons, Transport Symbols, 046 * and all other pictographic symbols</li> 047 * </ul> 048 * 049 * <p> 050 * For a complete list of Unicode characters and ranges, see: 051 * <a href="https://en.wikipedia.org/wiki/List_of_Unicode_characters"> 052 * List of Unicode characters</a> 053 * </p> 054 * 055 * <ul> 056 * <li> 057 * Property {@code symbolCodes} - Specify the symbols to check for, as Unicode code points 058 * or ranges. Format: comma-separated list of hex codes or ranges 059 * (e.g., {@code "0x2705, 0x1F600-0x1F64F"}). To allow only ASCII characters, 060 * use {@code "0x0080-0x10FFFF"}. 061 * Type is {@code java.lang.String}. 062 * Default value is {@code "0x2190-0x27BF, 0x1F700-0x10FFFF"}. 063 * </li> 064 * </ul> 065 * 066 * @since 13.1.0 067 */ 068@StatelessCheck 069public class IllegalSymbolCheck extends AbstractCheck { 070 071 /** 072 * A key is pointing to the warning message text in "messages.properties" file. 073 */ 074 public static final String MSG_KEY = "illegal.symbol"; 075 076 /** String Range Separator. */ 077 private static final String RANGE_SEPARATOR = "-"; 078 079 /** Specify the symbols to check for, as Unicode code points or ranges. */ 080 private String symbolCodes = "0x2190-0x27BF, 0x1F700-0x10FFFF"; 081 082 /** 083 * Setter to specify the symbols to check for. 084 * Format: comma-separated list of hex codes or ranges 085 * (e.g., "0x2705, 0xd83c-0xd83e"). 086 * 087 * @param symbols the symbols specification 088 * @since 13.1.0 089 */ 090 public void setSymbolCodes(String symbols) { 091 symbolCodes = symbols; 092 } 093 094 @Override 095 public int[] getDefaultTokens() { 096 return new int[] { 097 TokenTypes.COMMENT_CONTENT, 098 }; 099 } 100 101 @Override 102 public int[] getAcceptableTokens() { 103 return new int[] { 104 TokenTypes.COMMENT_CONTENT, 105 TokenTypes.STRING_LITERAL, 106 TokenTypes.CHAR_LITERAL, 107 TokenTypes.TEXT_BLOCK_CONTENT, 108 TokenTypes.IDENT, 109 }; 110 } 111 112 @Override 113 public int[] getRequiredTokens() { 114 return CommonUtil.EMPTY_INT_ARRAY; 115 } 116 117 @Override 118 public boolean isCommentNodesRequired() { 119 return true; 120 } 121 122 @Override 123 public void visitToken(DetailAST ast) { 124 final String text = ast.getText(); 125 checkText(text, ast); 126 } 127 128 /** 129 * Check the text for illegal symbols. 130 * 131 * @param text the text to check 132 * @param ast the AST node 133 */ 134 private void checkText(String text, DetailAST ast) { 135 final int length = text.length(); 136 int offset = 0; 137 138 while (offset < length) { 139 final int codePoint = text.codePointAt(offset); 140 141 if (isIllegalSymbol(codePoint)) { 142 log(ast, MSG_KEY); 143 break; 144 } 145 146 offset += Character.charCount(codePoint); 147 } 148 } 149 150 /** 151 * Check if a code point is illegal based on configured ranges. 152 * 153 * @param codePoint the code point to check 154 * @return true if the code point is illegal 155 */ 156 private boolean isIllegalSymbol(int codePoint) { 157 return !symbolCodes.isEmpty() && isInSymbolCodes(codePoint); 158 } 159 160 /** 161 * Check if code point is in the configured symbol codes. 162 * 163 * @param codePoint the code point to check 164 * @return true if in symbol codes 165 */ 166 private boolean isInSymbolCodes(int codePoint) { 167 boolean found = false; 168 final String[] parts = symbolCodes.split(",", -1); 169 170 for (String part : parts) { 171 final String trimmed = part.trim(); 172 if (trimmed.contains(RANGE_SEPARATOR)) { 173 // Range format 174 found = isInRange(codePoint, trimmed); 175 } 176 else { 177 // Single code point 178 final int checkPoint = parseCodePoint(trimmed); 179 found = codePoint == checkPoint; 180 } 181 182 if (found) { 183 break; 184 } 185 } 186 187 return found; 188 } 189 190 /** 191 * Check if code point is in the specified range. 192 * 193 * @param codePoint the code point to check 194 * @param rangeStr the range string (e.g., "0x1F600-0x1F64F") 195 * @return true if in range 196 */ 197 private static boolean isInRange(int codePoint, String rangeStr) { 198 final String[] range = rangeStr.split(RANGE_SEPARATOR, -1); 199 boolean result = false; 200 201 if (range.length == 2) { 202 final int start = parseCodePoint(range[0].trim()); 203 final int end = parseCodePoint(range[1].trim()); 204 result = codePoint >= start && codePoint <= end; 205 } 206 207 return result; 208 } 209 210 /** 211 * Parse a code point from string representation. 212 * Supports formats: 0x1234, \\u1234, U+1234, or decimal. 213 * 214 * @param str the string to parse 215 * @return the code point value 216 */ 217 private static int parseCodePoint(String str) { 218 final String cleaned = str.trim(); 219 final int hexRadix = 16; 220 final int result; 221 222 if (cleaned.startsWith("\\u") 223 || cleaned.startsWith("0x") 224 || cleaned.startsWith("0X") 225 || cleaned.startsWith("U+") 226 || cleaned.startsWith("u+")) { 227 result = Integer.parseInt(cleaned.substring(2), hexRadix); 228 } 229 else { 230 result = Integer.parseInt(cleaned, hexRadix); 231 } 232 return result; 233 } 234}