001///////////////////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code and other text files for adherence to a set of rules.
003// Copyright (C) 2001-2025 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018///////////////////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.javadoc;
021
022import java.util.ArrayList;
023import java.util.Arrays;
024import java.util.BitSet;
025import java.util.List;
026import java.util.Optional;
027import java.util.regex.Pattern;
028import java.util.stream.Stream;
029
030import com.puppycrawl.tools.checkstyle.StatelessCheck;
031import com.puppycrawl.tools.checkstyle.api.DetailNode;
032import com.puppycrawl.tools.checkstyle.api.JavadocTokenTypes;
033import com.puppycrawl.tools.checkstyle.utils.CommonUtil;
034import com.puppycrawl.tools.checkstyle.utils.JavadocUtil;
035import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
036
037/**
038 * <div>
039 * Checks that
040 * <a href="https://www.oracle.com/technical-resources/articles/java/javadoc-tool.html#firstsentence">
041 * Javadoc summary sentence</a> does not contain phrases that are not recommended to use.
042 * Summaries that contain only the {@code {@inheritDoc}} tag are skipped.
043 * Summaries that contain a non-empty {@code {@return}} are allowed.
044 * Check also violate Javadoc that does not contain first sentence, though with {@code {@return}} a
045 * period is not required as the Javadoc tool adds it.
046 * </div>
047 *
048 * <p>
049 * Note: For defining a summary, both the first sentence and the @summary tag approaches
050 * are supported.
051 * </p>
052 *
053 * @since 6.0
054 */
055@StatelessCheck
056public class SummaryJavadocCheck extends AbstractJavadocCheck {
057
058    /**
059     * A key is pointing to the warning message text in "messages.properties"
060     * file.
061     */
062    public static final String MSG_SUMMARY_FIRST_SENTENCE = "summary.first.sentence";
063
064    /**
065     * A key is pointing to the warning message text in "messages.properties"
066     * file.
067     */
068    public static final String MSG_SUMMARY_JAVADOC = "summary.javaDoc";
069
070    /**
071     * A key is pointing to the warning message text in "messages.properties"
072     * file.
073     */
074    public static final String MSG_SUMMARY_JAVADOC_MISSING = "summary.javaDoc.missing";
075
076    /**
077     * A key is pointing to the warning message text in "messages.properties" file.
078     */
079    public static final String MSG_SUMMARY_MISSING_PERIOD = "summary.javaDoc.missing.period";
080
081    /**
082     * This regexp is used to convert multiline javadoc to single-line without stars.
083     */
084    private static final Pattern JAVADOC_MULTILINE_TO_SINGLELINE_PATTERN =
085            Pattern.compile("\n +(\\*)|^ +(\\*)");
086
087    /**
088     * This regexp is used to remove html tags, whitespace, and asterisks from a string.
089     */
090    private static final Pattern HTML_ELEMENTS =
091            Pattern.compile("<[^>]*>");
092
093    /** Default period literal. */
094    private static final String DEFAULT_PERIOD = ".";
095
096    /** Summary tag text. */
097    private static final String SUMMARY_TEXT = "@summary";
098
099    /** Return tag text. */
100    private static final String RETURN_TEXT = "@return";
101
102    /** Set of allowed Tokens tags in summary java doc. */
103    private static final BitSet ALLOWED_TYPES = TokenUtil.asBitSet(
104                    JavadocTokenTypes.WS,
105                    JavadocTokenTypes.DESCRIPTION,
106                    JavadocTokenTypes.TEXT);
107
108    /**
109     * Specify the regexp for forbidden summary fragments.
110     */
111    private Pattern forbiddenSummaryFragments = CommonUtil.createPattern("^$");
112
113    /**
114     * Specify the period symbol. Used to check the first sentence ends with a period. Periods that
115     * are not followed by a whitespace character are ignored (eg. the period in v1.0). Because some
116     * periods include whitespace built into the character, if this is set to a non-default value
117     * any period will end the sentence, whether it is followed by whitespace or not.
118     */
119    private String period = DEFAULT_PERIOD;
120
121    /**
122     * Setter to specify the regexp for forbidden summary fragments.
123     *
124     * @param pattern a pattern.
125     * @since 6.0
126     */
127    public void setForbiddenSummaryFragments(Pattern pattern) {
128        forbiddenSummaryFragments = pattern;
129    }
130
131    /**
132     * Setter to specify the period symbol. Used to check the first sentence ends with a period.
133     * Periods that are not followed by a whitespace character are ignored (eg. the period in v1.0).
134     * Because some periods include whitespace built into the character, if this is set to a
135     * non-default value any period will end the sentence, whether it is followed by whitespace or
136     * not.
137     *
138     * @param period period's value.
139     * @since 6.2
140     */
141    public void setPeriod(String period) {
142        this.period = period;
143    }
144
145    @Override
146    public int[] getDefaultJavadocTokens() {
147        return new int[] {
148            JavadocTokenTypes.JAVADOC,
149        };
150    }
151
152    @Override
153    public int[] getRequiredJavadocTokens() {
154        return getAcceptableJavadocTokens();
155    }
156
157    @Override
158    public void visitJavadocToken(DetailNode ast) {
159        final Optional<DetailNode> inlineTagNode = getInlineTagNode(ast);
160        boolean shouldValidateUntaggedSummary = true;
161        if (inlineTagNode.isPresent()) {
162            final DetailNode node = inlineTagNode.get();
163            if (isSummaryTag(node) && isDefinedFirst(node)) {
164                shouldValidateUntaggedSummary = false;
165                validateSummaryTag(node);
166            }
167            else if (isInlineReturnTag(node)) {
168                shouldValidateUntaggedSummary = false;
169                validateInlineReturnTag(node);
170            }
171        }
172        if (shouldValidateUntaggedSummary && !startsWithInheritDoc(ast)) {
173            validateUntaggedSummary(ast);
174        }
175    }
176
177    /**
178     * Checks the javadoc text for {@code period} at end and forbidden fragments.
179     *
180     * @param ast the javadoc text node
181     */
182    private void validateUntaggedSummary(DetailNode ast) {
183        final String summaryDoc = getSummarySentence(ast);
184        if (summaryDoc.isEmpty()) {
185            log(ast.getLineNumber(), MSG_SUMMARY_JAVADOC_MISSING);
186        }
187        else if (!period.isEmpty()) {
188            if (summaryDoc.contains(period)) {
189                final Optional<String> firstSentence = getFirstSentence(ast, period);
190
191                if (firstSentence.isPresent()) {
192                    if (containsForbiddenFragment(firstSentence.get())) {
193                        log(ast.getLineNumber(), MSG_SUMMARY_JAVADOC);
194                    }
195                }
196                else {
197                    log(ast.getLineNumber(), MSG_SUMMARY_FIRST_SENTENCE);
198                }
199            }
200            else {
201                log(ast.getLineNumber(), MSG_SUMMARY_FIRST_SENTENCE);
202            }
203        }
204    }
205
206    /**
207     * Gets the node for the inline tag if present.
208     *
209     * @param javadoc javadoc root node.
210     * @return the node for the inline tag if present.
211     */
212    private static Optional<DetailNode> getInlineTagNode(DetailNode javadoc) {
213        return Arrays.stream(javadoc.getChildren())
214            .filter(SummaryJavadocCheck::isInlineTagPresent)
215            .findFirst()
216            .map(SummaryJavadocCheck::getInlineTagNodeForAst);
217    }
218
219    /**
220     * Whether the {@code {@summary}} tag is defined first in the javadoc.
221     *
222     * @param inlineSummaryTag node of type {@link JavadocTokenTypes#JAVADOC_INLINE_TAG}
223     * @return {@code true} if the {@code {@summary}} tag is defined first in the javadoc
224     */
225    private static boolean isDefinedFirst(DetailNode inlineSummaryTag) {
226        boolean isDefinedFirst = true;
227        DetailNode currentAst = inlineSummaryTag;
228        while (currentAst != null && isDefinedFirst) {
229            isDefinedFirst = switch (currentAst.getType()) {
230                case JavadocTokenTypes.TEXT -> currentAst.getText().isBlank();
231                case JavadocTokenTypes.HTML_ELEMENT -> !isTextPresentInsideHtmlTag(currentAst);
232                default -> isDefinedFirst;
233            };
234            currentAst = JavadocUtil.getPreviousSibling(currentAst);
235        }
236        return isDefinedFirst;
237    }
238
239    /**
240     * Whether some text is present inside the HTML element or tag.
241     *
242     * @param node DetailNode of type {@link JavadocTokenTypes#HTML_TAG}
243     *             or {@link JavadocTokenTypes#HTML_ELEMENT}
244     * @return {@code true} if some text is present inside the HTML element or tag
245     */
246    public static boolean isTextPresentInsideHtmlTag(DetailNode node) {
247        DetailNode nestedChild = JavadocUtil.getFirstChild(node);
248        if (node.getType() == JavadocTokenTypes.HTML_ELEMENT) {
249            nestedChild = JavadocUtil.getFirstChild(nestedChild);
250        }
251        boolean isTextPresentInsideHtmlTag = false;
252        while (nestedChild != null && !isTextPresentInsideHtmlTag) {
253            isTextPresentInsideHtmlTag = switch (nestedChild.getType()) {
254                case JavadocTokenTypes.TEXT -> !nestedChild.getText().isBlank();
255                case JavadocTokenTypes.HTML_TAG, JavadocTokenTypes.HTML_ELEMENT ->
256                    isTextPresentInsideHtmlTag(nestedChild);
257                default -> isTextPresentInsideHtmlTag;
258            };
259            nestedChild = JavadocUtil.getNextSibling(nestedChild);
260        }
261        return isTextPresentInsideHtmlTag;
262    }
263
264    /**
265     * Checks if the inline tag node is present.
266     *
267     * @param ast ast node to check.
268     * @return true, if the inline tag node is present.
269     */
270    private static boolean isInlineTagPresent(DetailNode ast) {
271        return getInlineTagNodeForAst(ast) != null;
272    }
273
274    /**
275     * Returns an inline javadoc tag node that is within a html tag.
276     *
277     * @param ast html tag node.
278     * @return inline summary javadoc tag node or null if no node is found.
279     */
280    private static DetailNode getInlineTagNodeForAst(DetailNode ast) {
281        DetailNode node = ast;
282        DetailNode result = null;
283        // node can never be null as this method is called when there is a HTML_ELEMENT
284        if (node.getType() == JavadocTokenTypes.JAVADOC_INLINE_TAG) {
285            result = node;
286        }
287        else if (node.getType() == JavadocTokenTypes.HTML_TAG) {
288            // HTML_TAG always has more than 2 children.
289            node = node.getChildren()[1];
290            result = getInlineTagNodeForAst(node);
291        }
292        else if (node.getType() == JavadocTokenTypes.HTML_ELEMENT
293                // Condition for SINGLETON html element which cannot contain summary node
294                && node.getChildren()[0].getChildren().length > 1) {
295            // Html elements have one tested tag before actual content inside it
296            node = node.getChildren()[0].getChildren()[1];
297            result = getInlineTagNodeForAst(node);
298        }
299        return result;
300    }
301
302    /**
303     * Checks if the javadoc inline tag is {@code {@summary}} tag.
304     *
305     * @param javadocInlineTag node of type {@link JavadocTokenTypes#JAVADOC_INLINE_TAG}
306     * @return {@code true} if inline tag is summary tag.
307     */
308    private static boolean isSummaryTag(DetailNode javadocInlineTag) {
309        return isInlineTagWithName(javadocInlineTag, SUMMARY_TEXT);
310    }
311
312    /**
313     * Checks if the first tag inside ast is {@code {@return}} tag.
314     *
315     * @param javadocInlineTag node of type {@link JavadocTokenTypes#JAVADOC_INLINE_TAG}
316     * @return {@code true} if first tag is return tag.
317     */
318    private static boolean isInlineReturnTag(DetailNode javadocInlineTag) {
319        return isInlineTagWithName(javadocInlineTag, RETURN_TEXT);
320    }
321
322    /**
323     * Checks if the first tag inside ast is a tag with the given name.
324     *
325     * @param javadocInlineTag node of type {@link JavadocTokenTypes#JAVADOC_INLINE_TAG}
326     * @param name name of inline tag.
327     *
328     * @return {@code true} if first tag is a tag with the given name.
329     */
330    private static boolean isInlineTagWithName(DetailNode javadocInlineTag, String name) {
331        final DetailNode[] child = javadocInlineTag.getChildren();
332
333        // Checking size of ast is not required, since ast contains
334        // children of Inline Tag, as at least 2 children will be present which are
335        // RCURLY and LCURLY.
336        return name.equals(child[1].getText());
337    }
338
339    /**
340     * Checks the inline summary (if present) for {@code period} at end and forbidden fragments.
341     *
342     * @param inlineSummaryTag node of type {@link JavadocTokenTypes#JAVADOC_INLINE_TAG}
343     */
344    private void validateSummaryTag(DetailNode inlineSummaryTag) {
345        final String inlineSummary = getContentOfInlineCustomTag(inlineSummaryTag);
346        final String summaryVisible = getVisibleContent(inlineSummary);
347        if (summaryVisible.isEmpty()) {
348            log(inlineSummaryTag.getLineNumber(), MSG_SUMMARY_JAVADOC_MISSING);
349        }
350        else if (!period.isEmpty()) {
351            final boolean isPeriodNotAtEnd =
352                    summaryVisible.lastIndexOf(period) != summaryVisible.length() - 1;
353            if (isPeriodNotAtEnd) {
354                log(inlineSummaryTag.getLineNumber(), MSG_SUMMARY_MISSING_PERIOD);
355            }
356            else if (containsForbiddenFragment(inlineSummary)) {
357                log(inlineSummaryTag.getLineNumber(), MSG_SUMMARY_JAVADOC);
358            }
359        }
360    }
361
362    /**
363     * Checks the inline return for forbidden fragments.
364     *
365     * @param inlineReturnTag node of type {@link JavadocTokenTypes#JAVADOC_INLINE_TAG}
366     */
367    private void validateInlineReturnTag(DetailNode inlineReturnTag) {
368        final String inlineReturn = getContentOfInlineCustomTag(inlineReturnTag);
369        final String returnVisible = getVisibleContent(inlineReturn);
370        if (returnVisible.isEmpty()) {
371            log(inlineReturnTag.getLineNumber(), MSG_SUMMARY_JAVADOC_MISSING);
372        }
373        else if (containsForbiddenFragment(inlineReturn)) {
374            log(inlineReturnTag.getLineNumber(), MSG_SUMMARY_JAVADOC);
375        }
376    }
377
378    /**
379     * Gets the content of inline custom tag.
380     *
381     * @param inlineTag inline tag node.
382     * @return String consisting of the content of inline custom tag.
383     */
384    public static String getContentOfInlineCustomTag(DetailNode inlineTag) {
385        final DetailNode[] childrenOfInlineTag = inlineTag.getChildren();
386        final StringBuilder customTagContent = new StringBuilder(256);
387        final int indexOfContentOfSummaryTag = 3;
388        if (childrenOfInlineTag.length != indexOfContentOfSummaryTag) {
389            DetailNode currentNode = childrenOfInlineTag[indexOfContentOfSummaryTag];
390            while (currentNode.getType() != JavadocTokenTypes.JAVADOC_INLINE_TAG_END) {
391                extractInlineTagContent(currentNode, customTagContent);
392                currentNode = JavadocUtil.getNextSibling(currentNode);
393            }
394        }
395        return customTagContent.toString();
396    }
397
398    /**
399     * Extracts the content of inline custom tag recursively.
400     *
401     * @param node DetailNode
402     * @param customTagContent content of custom tag
403     */
404    private static void extractInlineTagContent(DetailNode node,
405        StringBuilder customTagContent) {
406        final DetailNode[] children = node.getChildren();
407        if (children.length == 0) {
408            customTagContent.append(node.getText());
409        }
410        else {
411            for (DetailNode child : children) {
412                if (child.getType() != JavadocTokenTypes.LEADING_ASTERISK) {
413                    extractInlineTagContent(child, customTagContent);
414                }
415            }
416        }
417    }
418
419    /**
420     * Gets the string that is visible to user in javadoc.
421     *
422     * @param summary entire content of summary javadoc.
423     * @return string that is visible to user in javadoc.
424     */
425    private static String getVisibleContent(String summary) {
426        final String visibleSummary = HTML_ELEMENTS.matcher(summary).replaceAll("");
427        return visibleSummary.trim();
428    }
429
430    /**
431     * Tests if first sentence contains forbidden summary fragment.
432     *
433     * @param firstSentence string with first sentence.
434     * @return {@code true} if first sentence contains forbidden summary fragment.
435     */
436    private boolean containsForbiddenFragment(String firstSentence) {
437        final String javadocText = JAVADOC_MULTILINE_TO_SINGLELINE_PATTERN
438                .matcher(firstSentence).replaceAll(" ");
439        return forbiddenSummaryFragments.matcher(trimExcessWhitespaces(javadocText)).find();
440    }
441
442    /**
443     * Trims the given {@code text} of duplicate whitespaces.
444     *
445     * @param text the text to transform.
446     * @return the finalized form of the text.
447     */
448    private static String trimExcessWhitespaces(String text) {
449        final StringBuilder result = new StringBuilder(256);
450        boolean previousWhitespace = true;
451
452        for (char letter : text.toCharArray()) {
453            final char print;
454            if (Character.isWhitespace(letter)) {
455                if (previousWhitespace) {
456                    continue;
457                }
458
459                previousWhitespace = true;
460                print = ' ';
461            }
462            else {
463                previousWhitespace = false;
464                print = letter;
465            }
466
467            result.append(print);
468        }
469
470        return result.toString();
471    }
472
473    /**
474     * Checks if the node starts with an {&#64;inheritDoc}.
475     *
476     * @param root the root node to examine.
477     * @return {@code true} if the javadoc starts with an {&#64;inheritDoc}.
478     */
479    private static boolean startsWithInheritDoc(DetailNode root) {
480        boolean found = false;
481
482        for (DetailNode child : root.getChildren()) {
483            if (child.getType() == JavadocTokenTypes.JAVADOC_INLINE_TAG
484                    && child.getChildren()[1].getType() == JavadocTokenTypes.INHERIT_DOC_LITERAL) {
485                found = true;
486            }
487            if ((child.getType() == JavadocTokenTypes.TEXT
488                    || child.getType() == JavadocTokenTypes.HTML_ELEMENT)
489                    && !CommonUtil.isBlank(child.getText())) {
490                break;
491            }
492        }
493
494        return found;
495    }
496
497    /**
498     * Finds and returns summary sentence.
499     *
500     * @param ast javadoc root node.
501     * @return violation string.
502     */
503    private static String getSummarySentence(DetailNode ast) {
504        final StringBuilder result = new StringBuilder(256);
505        for (DetailNode child : ast.getChildren()) {
506            if (child.getType() != JavadocTokenTypes.EOF
507                    && ALLOWED_TYPES.get(child.getType())) {
508                result.append(child.getText());
509            }
510            else {
511                final String summary = result.toString();
512                if (child.getType() == JavadocTokenTypes.HTML_ELEMENT
513                        && CommonUtil.isBlank(summary)) {
514                    result.append(getStringInsideTag(summary,
515                            child.getChildren()[0].getChildren()[0]));
516                }
517            }
518        }
519        return result.toString().trim();
520    }
521
522    /**
523     * Get concatenated string within text of html tags.
524     *
525     * @param result javadoc string
526     * @param detailNode javadoc tag node
527     * @return java doc tag content appended in result
528     */
529    private static String getStringInsideTag(String result, DetailNode detailNode) {
530        final StringBuilder contents = new StringBuilder(result);
531        DetailNode tempNode = detailNode;
532        while (tempNode != null) {
533            if (tempNode.getType() == JavadocTokenTypes.TEXT) {
534                contents.append(tempNode.getText());
535            }
536            tempNode = JavadocUtil.getNextSibling(tempNode);
537        }
538        return contents.toString();
539    }
540
541    /**
542     * Finds the first sentence.
543     *
544     * @param ast The Javadoc root node.
545     * @param period The configured period symbol.
546     * @return An Optional containing the first sentence
547     *     up to and excluding the period, or an empty
548     *     Optional if no ending was found.
549     */
550    private static Optional<String> getFirstSentence(DetailNode ast, String period) {
551        final List<String> sentenceParts = new ArrayList<>();
552        Optional<String> result = Optional.empty();
553        for (String text : (Iterable<String>) streamTextParts(ast)::iterator) {
554            final Optional<String> sentenceEnding = findSentenceEnding(text, period);
555
556            if (sentenceEnding.isPresent()) {
557                sentenceParts.add(sentenceEnding.get());
558                result = Optional.of(String.join("", sentenceParts));
559                break;
560            }
561            sentenceParts.add(text);
562        }
563        return result;
564    }
565
566    /**
567     * Streams through all the text under the given node.
568     *
569     * @param node The Javadoc node to examine.
570     * @return All the text in all nodes that have no child nodes.
571     */
572    private static Stream<String> streamTextParts(DetailNode node) {
573        final Stream<String> stream;
574        if (node.getChildren().length == 0) {
575            stream = Stream.of(node.getText());
576        }
577        else {
578            stream = Stream.of(node.getChildren())
579                .flatMap(SummaryJavadocCheck::streamTextParts);
580        }
581        return stream;
582    }
583
584    /**
585     * Finds the end of a sentence. The end of sentence detection here could be replaced in the
586     * future by Java's built-in BreakIterator class.
587     *
588     * @param text The string to search.
589     * @param period The period character to find.
590     * @return An Optional containing the string up to and excluding the period,
591     *     or empty Optional if no ending was found.
592     */
593    private static Optional<String> findSentenceEnding(String text, String period) {
594        int periodIndex = text.indexOf(period);
595        Optional<String> result = Optional.empty();
596        while (periodIndex >= 0) {
597            final int afterPeriodIndex = periodIndex + period.length();
598
599            // Handle western period separately as it is only the end of a sentence if followed
600            // by whitespace. Other period characters often include whitespace in the character.
601            if (!DEFAULT_PERIOD.equals(period)
602                || afterPeriodIndex >= text.length()
603                || Character.isWhitespace(text.charAt(afterPeriodIndex))) {
604                final String resultStr = text.substring(0, periodIndex);
605                result = Optional.of(resultStr);
606                break;
607            }
608            periodIndex = text.indexOf(period, afterPeriodIndex);
609        }
610        return result;
611    }
612}