001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2026 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.coding; 021 022import com.puppycrawl.tools.checkstyle.StatelessCheck; 023import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 024import com.puppycrawl.tools.checkstyle.api.DetailAST; 025import com.puppycrawl.tools.checkstyle.api.TokenTypes; 026import com.puppycrawl.tools.checkstyle.utils.CommonUtil; 027 028/** 029 * <div> 030 * Checks that specified symbols (by Unicode code points or ranges) are not used in code. 031 * </div> 032 * 033 * <p> 034 * Rationale: This check helps prevent emoji symbols in code, enforce ASCII-only source files, 035 * or forbid specific Unicode characters. 036 * </p> 037 * 038 * @since 13.1.0 039 */ 040@StatelessCheck 041public class IllegalSymbolCheck extends AbstractCheck { 042 043 /** 044 * A key is pointing to the warning message text in "messages.properties" file. 045 */ 046 public static final String MSG_KEY = "illegal.symbol"; 047 048 /** String Range Separator. */ 049 private static final String RANGE_SEPARATOR = "-"; 050 051 /** ASCII range upper bound (exclusive). */ 052 private static final int ASCII_UPPER_BOUND = 0x80; 053 054 /** Specify the symbols to check for, as Unicode code points or ranges. */ 055 private String symbolCodes = ""; 056 057 /** Control whether only ASCII characters are allowed. */ 058 private boolean asciiOnly; 059 060 /** 061 * Setter to specify the symbols to check for. 062 * Format: comma-separated list of hex codes or ranges 063 * (e.g., "0x2705, 0xd83c-0xd83e"). 064 * 065 * @param symbols the symbols specification 066 * @since 13.1.0 067 */ 068 public void setSymbolCodes(String symbols) { 069 symbolCodes = symbols; 070 } 071 072 /** 073 * Setter to control whether only ASCII characters are allowed. 074 * 075 * @param asciiOnly true to allow only ASCII characters 076 * @since 13.1.0 077 */ 078 public void setAsciiOnly(boolean asciiOnly) { 079 this.asciiOnly = asciiOnly; 080 } 081 082 @Override 083 public int[] getDefaultTokens() { 084 return new int[] { 085 TokenTypes.COMMENT_CONTENT, 086 }; 087 } 088 089 @Override 090 public int[] getAcceptableTokens() { 091 return new int[] { 092 TokenTypes.COMMENT_CONTENT, 093 TokenTypes.STRING_LITERAL, 094 TokenTypes.CHAR_LITERAL, 095 TokenTypes.TEXT_BLOCK_CONTENT, 096 TokenTypes.IDENT, 097 }; 098 } 099 100 @Override 101 public int[] getRequiredTokens() { 102 return CommonUtil.EMPTY_INT_ARRAY; 103 } 104 105 @Override 106 public boolean isCommentNodesRequired() { 107 return true; 108 } 109 110 @Override 111 public void visitToken(DetailAST ast) { 112 final String text = ast.getText(); 113 checkText(text, ast); 114 } 115 116 /** 117 * Check the text for illegal symbols. 118 * 119 * @param text the text to check 120 * @param ast the AST node 121 */ 122 private void checkText(String text, DetailAST ast) { 123 final int length = text.length(); 124 int offset = 0; 125 126 while (offset < length) { 127 final int codePoint = text.codePointAt(offset); 128 129 if (isIllegalSymbol(codePoint)) { 130 log(ast, MSG_KEY); 131 break; 132 } 133 134 offset += Character.charCount(codePoint); 135 } 136 } 137 138 /** 139 * Check if a code point is illegal based on configured ranges. 140 * 141 * @param codePoint the code point to check 142 * @return true if the code point is illegal 143 */ 144 private boolean isIllegalSymbol(int codePoint) { 145 boolean result = false; 146 147 if (asciiOnly && codePoint >= ASCII_UPPER_BOUND) { 148 result = true; 149 } 150 else if (!symbolCodes.isEmpty()) { 151 result = isInSymbolCodes(codePoint); 152 } 153 154 return result; 155 } 156 157 /** 158 * Check if code point is in the configured symbol codes. 159 * 160 * @param codePoint the code point to check 161 * @return true if in symbol codes 162 */ 163 private boolean isInSymbolCodes(int codePoint) { 164 boolean found = false; 165 final String[] parts = symbolCodes.split(",", -1); 166 167 for (String part : parts) { 168 final String trimmed = part.trim(); 169 if (trimmed.contains(RANGE_SEPARATOR)) { 170 // Range format 171 found = isInRange(codePoint, trimmed); 172 } 173 else { 174 // Single code point 175 final int checkPoint = parseCodePoint(trimmed); 176 found = codePoint == checkPoint; 177 } 178 179 if (found) { 180 break; 181 } 182 } 183 184 return found; 185 } 186 187 /** 188 * Check if code point is in the specified range. 189 * 190 * @param codePoint the code point to check 191 * @param rangeStr the range string (e.g., "0x1F600-0x1F64F") 192 * @return true if in range 193 */ 194 private static boolean isInRange(int codePoint, String rangeStr) { 195 final String[] range = rangeStr.split(RANGE_SEPARATOR, -1); 196 boolean result = false; 197 198 if (range.length == 2) { 199 final int start = parseCodePoint(range[0].trim()); 200 final int end = parseCodePoint(range[1].trim()); 201 result = codePoint >= start && codePoint <= end; 202 } 203 204 return result; 205 } 206 207 /** 208 * Parse a code point from string representation. 209 * Supports formats: 0x1234, \\u1234, U+1234, or decimal. 210 * 211 * @param str the string to parse 212 * @return the code point value 213 */ 214 private static int parseCodePoint(String str) { 215 final String cleaned = str.trim(); 216 final int hexRadix = 16; 217 final int result; 218 219 if (cleaned.startsWith("\\u") 220 || cleaned.startsWith("0x") 221 || cleaned.startsWith("0X") 222 || cleaned.startsWith("U+") 223 || cleaned.startsWith("u+")) { 224 result = Integer.parseInt(cleaned.substring(2), hexRadix); 225 } 226 else { 227 result = Integer.parseInt(cleaned, hexRadix); 228 } 229 return result; 230 } 231}