001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2026 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.meta; 021 022import java.util.Optional; 023import java.util.regex.Matcher; 024import java.util.regex.Pattern; 025 026import com.puppycrawl.tools.checkstyle.api.DetailNode; 027import com.puppycrawl.tools.checkstyle.api.JavadocCommentsTokenTypes; 028import com.puppycrawl.tools.checkstyle.utils.JavadocUtil; 029 030/** 031 * Class for scraping module metadata from the corresponding class' class-level javadoc. 032 */ 033public final class JavadocMetadataScraperUtil { 034 035 /** Regular expression for detecting ANTLR tokens(for e.g. CLASS_DEF). */ 036 private static final Pattern TOKEN_TEXT_PATTERN = Pattern.compile("([A-Z_]{2,})+"); 037 038 /** 039 * Private utility constructor. 040 */ 041 private JavadocMetadataScraperUtil() { 042 } 043 044 /** 045 * Performs a depth-first traversal of the subtree starting at {@code startNode} 046 * and ending at {@code endNode}, and constructs the concatenated text of all nodes 047 * in that range, ignoring {@code JavadocToken} texts. 048 * 049 * @param startNode the node where traversal begins (inclusive) 050 * @param endNode the node where traversal ends (inclusive) 051 * @return the constructed text from the specified subtree range 052 */ 053 public static String constructSubTreeText(DetailNode startNode, 054 DetailNode endNode) { 055 DetailNode curNode = startNode; 056 final StringBuilder result = new StringBuilder(1024); 057 058 while (curNode != null) { 059 if (isContentToWrite(curNode)) { 060 String childText = curNode.getText(); 061 062 if (isInsideCodeInlineTag(curNode)) { 063 childText = adjustCodeInlineTagChildToHtml(curNode); 064 } 065 else if (isInsideLiteralInlineTag(curNode)) { 066 childText = adjustLiteralInlineTagChildToText(curNode); 067 } 068 069 result.append(childText); 070 } 071 072 DetailNode toVisit = curNode.getFirstChild(); 073 while (curNode != endNode && toVisit == null) { 074 toVisit = curNode.getNextSibling(); 075 curNode = curNode.getParent(); 076 } 077 078 curNode = toVisit; 079 } 080 return result.toString().trim(); 081 } 082 083 /** 084 * Checks whether the given node is inside a {@code @code} Javadoc inline tag. 085 * 086 * @param node the node to check 087 * @return true if the node is inside a {@code @code} inline tag, false otherwise 088 */ 089 private static boolean isInsideCodeInlineTag(DetailNode node) { 090 return node.getParent() != null 091 && node.getParent().getType() == JavadocCommentsTokenTypes.CODE_INLINE_TAG; 092 } 093 094 /** 095 * Checks whether the given node is inside a {@code @literal} Javadoc inline tag. 096 * 097 * @param node the node to check 098 * @return true if the node is inside a {@code @literal} inline tag, false otherwise 099 */ 100 private static boolean isInsideLiteralInlineTag(DetailNode node) { 101 return node.getParent() != null 102 && node.getParent().getType() == JavadocCommentsTokenTypes.LITERAL_INLINE_TAG; 103 } 104 105 /** 106 * Checks whether selected Javadoc node is considered as something to write. 107 * 108 * @param detailNode javadoc node to check. 109 * @return whether javadoc node is something to write. 110 */ 111 private static boolean isContentToWrite(DetailNode detailNode) { 112 113 return detailNode.getType() != JavadocCommentsTokenTypes.LEADING_ASTERISK 114 && (detailNode.getType() == JavadocCommentsTokenTypes.TEXT 115 || !TOKEN_TEXT_PATTERN.matcher(detailNode.getText()).matches()); 116 } 117 118 /** 119 * Adjusts certain child of {@code @code} Javadoc inline tag to its analogous html format. 120 * 121 * @param codeChild {@code @code} child to convert. 122 * @return converted {@code @code} child element, otherwise just the original text. 123 */ 124 public static String adjustCodeInlineTagChildToHtml(DetailNode codeChild) { 125 126 return switch (codeChild.getType()) { 127 case JavadocCommentsTokenTypes.JAVADOC_INLINE_TAG_END -> "</code>"; 128 case JavadocCommentsTokenTypes.TAG_NAME -> ""; 129 case JavadocCommentsTokenTypes.JAVADOC_INLINE_TAG_START -> "<code>"; 130 default -> escapeXmlChars(codeChild.getText().trim()); 131 }; 132 } 133 134 /** 135 * Adjusts a child of {@code @literal} Javadoc inline tag to its XML-escaped plain text form. 136 * 137 * @param literalChild child node of the {@code @literal} inline tag. 138 * @return escaped text for content nodes, or empty string for structural tokens. 139 */ 140 public static String adjustLiteralInlineTagChildToText(DetailNode literalChild) { 141 return switch (literalChild.getType()) { 142 case JavadocCommentsTokenTypes.JAVADOC_INLINE_TAG_END, 143 JavadocCommentsTokenTypes.JAVADOC_INLINE_TAG_START, 144 JavadocCommentsTokenTypes.TAG_NAME -> ""; 145 default -> escapeXmlChars(literalChild.getText().trim()); 146 }; 147 } 148 149 /** 150 * Escapes special XML characters in the given text. 151 * 152 * @param text the text to escape. 153 * @return text with XML special characters escaped. 154 */ 155 private static String escapeXmlChars(String text) { 156 return text.replace("&", "&") 157 .replace("<", "<") 158 .replace(">", ">"); 159 } 160 161 /** 162 * Returns the first child node of the given parent that matches the provided {@code tokenType}. 163 * 164 * @param node the parent node 165 * @param tokenType the token type to match 166 * @return an {@link Optional} containing the first matching child node, 167 * or an empty {@link Optional} if none is found 168 */ 169 private static Optional<DetailNode> getFirstChildOfType(DetailNode node, int tokenType) { 170 return JavadocUtil.getAllNodesOfType(node, tokenType).stream().findFirst(); 171 } 172 173 /** 174 * Checks whether the first child {@code JavadocTokenType.TEXT} node matches given pattern. 175 * 176 * @param ast parent javadoc node 177 * @param pattern pattern to match 178 * @return true if one of child text nodes matches pattern 179 */ 180 public static boolean isChildNodeTextMatches(DetailNode ast, Pattern pattern) { 181 return getFirstChildOfType(ast, JavadocCommentsTokenTypes.TEXT) 182 .map(DetailNode::getText) 183 .map(pattern::matcher) 184 .map(Matcher::matches) 185 .orElse(Boolean.FALSE); 186 } 187}