001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2026 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.meta;
021
022import java.util.Optional;
023import java.util.regex.Matcher;
024import java.util.regex.Pattern;
025
026import com.puppycrawl.tools.checkstyle.api.DetailNode;
027import com.puppycrawl.tools.checkstyle.api.JavadocCommentsTokenTypes;
028import com.puppycrawl.tools.checkstyle.utils.JavadocUtil;
029
030/**
031 * Class for scraping module metadata from the corresponding class' class-level javadoc.
032 */
033public final class JavadocMetadataScraperUtil {
034
035    /** Regular expression for detecting ANTLR tokens(for e.g. CLASS_DEF). */
036    private static final Pattern TOKEN_TEXT_PATTERN = Pattern.compile("([A-Z_]{2,})+");
037
038    /**
039     * Private utility constructor.
040     */
041    private JavadocMetadataScraperUtil() {
042    }
043
044    /**
045     * Performs a depth-first traversal of the subtree starting at {@code startNode}
046     * and ending at {@code endNode}, and constructs the concatenated text of all nodes
047     * in that range, ignoring {@code JavadocToken} texts.
048     *
049     * @param startNode the node where traversal begins (inclusive)
050     * @param endNode the node where traversal ends (inclusive)
051     * @return the constructed text from the specified subtree range
052     */
053    public static String constructSubTreeText(DetailNode startNode,
054                                               DetailNode endNode) {
055        DetailNode curNode = startNode;
056        final StringBuilder result = new StringBuilder(1024);
057
058        while (curNode != null) {
059            if (isContentToWrite(curNode)) {
060                String childText = curNode.getText();
061
062                if (isInsideCodeInlineTag(curNode)) {
063                    childText = adjustCodeInlineTagChildToHtml(curNode);
064                }
065                else if (isInsideLiteralInlineTag(curNode)) {
066                    childText = adjustLiteralInlineTagChildToText(curNode);
067                }
068
069                result.append(childText);
070            }
071
072            DetailNode toVisit = curNode.getFirstChild();
073            while (curNode != endNode && toVisit == null) {
074                toVisit = curNode.getNextSibling();
075                curNode = curNode.getParent();
076            }
077
078            curNode = toVisit;
079        }
080        return result.toString().trim();
081    }
082
083    /**
084     * Checks whether the given node is inside a {@code @code} Javadoc inline tag.
085     *
086     * @param node the node to check
087     * @return true if the node is inside a {@code @code} inline tag, false otherwise
088     */
089    private static boolean isInsideCodeInlineTag(DetailNode node) {
090        return node.getParent() != null
091                && node.getParent().getType() == JavadocCommentsTokenTypes.CODE_INLINE_TAG;
092    }
093
094    /**
095     * Checks whether the given node is inside a {@code @literal} Javadoc inline tag.
096     *
097     * @param node the node to check
098     * @return true if the node is inside a {@code @literal} inline tag, false otherwise
099     */
100    private static boolean isInsideLiteralInlineTag(DetailNode node) {
101        return node.getParent() != null
102                && node.getParent().getType() == JavadocCommentsTokenTypes.LITERAL_INLINE_TAG;
103    }
104
105    /**
106     * Checks whether selected Javadoc node is considered as something to write.
107     *
108     * @param detailNode javadoc node to check.
109     * @return whether javadoc node is something to write.
110     */
111    private static boolean isContentToWrite(DetailNode detailNode) {
112
113        return detailNode.getType() != JavadocCommentsTokenTypes.LEADING_ASTERISK
114            && (detailNode.getType() == JavadocCommentsTokenTypes.TEXT
115            || !TOKEN_TEXT_PATTERN.matcher(detailNode.getText()).matches());
116    }
117
118    /**
119     * Adjusts certain child of {@code @code} Javadoc inline tag to its analogous html format.
120     *
121     * @param codeChild {@code @code} child to convert.
122     * @return converted {@code @code} child element, otherwise just the original text.
123     */
124    public static String adjustCodeInlineTagChildToHtml(DetailNode codeChild) {
125
126        return switch (codeChild.getType()) {
127            case JavadocCommentsTokenTypes.JAVADOC_INLINE_TAG_END -> "</code>";
128            case JavadocCommentsTokenTypes.TAG_NAME -> "";
129            case JavadocCommentsTokenTypes.JAVADOC_INLINE_TAG_START -> "<code>";
130            default -> escapeXmlChars(codeChild.getText().trim());
131        };
132    }
133
134    /**
135     * Adjusts a child of {@code @literal} Javadoc inline tag to its XML-escaped plain text form.
136     *
137     * @param literalChild child node of the {@code @literal} inline tag.
138     * @return escaped text for content nodes, or empty string for structural tokens.
139     */
140    public static String adjustLiteralInlineTagChildToText(DetailNode literalChild) {
141        return switch (literalChild.getType()) {
142            case JavadocCommentsTokenTypes.JAVADOC_INLINE_TAG_END,
143                 JavadocCommentsTokenTypes.JAVADOC_INLINE_TAG_START,
144                 JavadocCommentsTokenTypes.TAG_NAME -> "";
145            default -> escapeXmlChars(literalChild.getText().trim());
146        };
147    }
148
149    /**
150     * Escapes special XML characters in the given text.
151     *
152     * @param text the text to escape.
153     * @return text with XML special characters escaped.
154     */
155    private static String escapeXmlChars(String text) {
156        return text.replace("&", "&amp;")
157            .replace("<", "&lt;")
158            .replace(">", "&gt;");
159    }
160
161    /**
162     * Returns the first child node of the given parent that matches the provided {@code tokenType}.
163     *
164     * @param node the parent node
165     * @param tokenType the token type to match
166     * @return an {@link Optional} containing the first matching child node,
167     *         or an empty {@link Optional} if none is found
168     */
169    private static Optional<DetailNode> getFirstChildOfType(DetailNode node, int tokenType) {
170        return JavadocUtil.getAllNodesOfType(node, tokenType).stream().findFirst();
171    }
172
173    /**
174     * Checks whether the first child {@code JavadocTokenType.TEXT} node matches given pattern.
175     *
176     * @param ast parent javadoc node
177     * @param pattern pattern to match
178     * @return true if one of child text nodes matches pattern
179     */
180    public static boolean isChildNodeTextMatches(DetailNode ast, Pattern pattern) {
181        return getFirstChildOfType(ast, JavadocCommentsTokenTypes.TEXT)
182                .map(DetailNode::getText)
183                .map(pattern::matcher)
184                .map(Matcher::matches)
185                .orElse(Boolean.FALSE);
186    }
187}