001/////////////////////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code and other text files for adherence to a set of rules. 003// Copyright (C) 2001-2024 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018/////////////////////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.utils; 021 022import java.io.File; 023import java.io.IOException; 024import java.util.ArrayList; 025import java.util.BitSet; 026import java.util.List; 027import java.util.Locale; 028import java.util.regex.Pattern; 029import java.util.stream.Collectors; 030 031import com.puppycrawl.tools.checkstyle.AstTreeStringPrinter; 032import com.puppycrawl.tools.checkstyle.JavaParser; 033import com.puppycrawl.tools.checkstyle.api.CheckstyleException; 034import com.puppycrawl.tools.checkstyle.api.DetailAST; 035import com.puppycrawl.tools.checkstyle.api.TokenTypes; 036import com.puppycrawl.tools.checkstyle.xpath.AbstractNode; 037import com.puppycrawl.tools.checkstyle.xpath.ElementNode; 038import com.puppycrawl.tools.checkstyle.xpath.RootNode; 039import net.sf.saxon.Configuration; 040import net.sf.saxon.om.Item; 041import net.sf.saxon.om.NodeInfo; 042import net.sf.saxon.sxpath.XPathDynamicContext; 043import net.sf.saxon.sxpath.XPathEvaluator; 044import net.sf.saxon.sxpath.XPathExpression; 045import net.sf.saxon.trans.XPathException; 046 047/** 048 * Contains utility methods for xpath. 049 * 050 */ 051public final class XpathUtil { 052 053 /** 054 * Token types which support text attribute. 055 * These token types were selected based on analysis that all others do not match required 056 * criteria - text attribute of the token must be useful and help to retrieve more precise 057 * results. 058 * There are three types of AST tokens: 059 * 1. Tokens for which the texts are equal to the name of the token. Or in other words, 060 * nodes for which the following expression is always true: 061 * <pre> 062 * detailAst.getText().equals(TokenUtil.getTokenName(detailAst.getType())) 063 * </pre> 064 * For example: 065 * <pre> 066 * //MODIFIERS[@text='MODIFIERS'] 067 * //OBJBLOCK[@text='OBJBLOCK'] 068 * </pre> 069 * These tokens do not match required criteria because their texts do not carry any additional 070 * information, they do not affect the xpath requests and do not help to get more accurate 071 * results. The texts of these nodes are useless. No matter what code you analyze, these 072 * texts are always the same. 073 * In addition, they make xpath queries more complex, less readable and verbose. 074 * 2. Tokens for which the texts differ from token names, but texts are always constant. 075 * For example: 076 * <pre> 077 * //LITERAL_VOID[@text='void'] 078 * //RCURLY[@text='}'] 079 * </pre> 080 * These tokens are not used for the same reasons as were described in the previous part. 081 * 3. Tokens for which texts are not constant. The texts of these nodes are closely related 082 * to a concrete class, method, variable and so on. 083 * For example: 084 * <pre> 085 * String greeting = "HelloWorld"; 086 * //STRING_LITERAL[@text='HelloWorld'] 087 * </pre> 088 * <pre> 089 * int year = 2017; 090 * //NUM_INT[@text=2017] 091 * </pre> 092 * <pre> 093 * int age = 23; 094 * //NUM_INT[@text=23] 095 * </pre> 096 * As you can see same {@code NUM_INT} token type can have different texts, depending on 097 * context. 098 * <pre> 099 * public class MyClass {} 100 * //IDENT[@text='MyClass'] 101 * </pre> 102 * Only these tokens support text attribute because they make our xpath queries more accurate. 103 * These token types are listed below. 104 */ 105 private static final BitSet TOKEN_TYPES_WITH_TEXT_ATTRIBUTE = TokenUtil.asBitSet( 106 TokenTypes.IDENT, TokenTypes.STRING_LITERAL, TokenTypes.CHAR_LITERAL, 107 TokenTypes.NUM_LONG, TokenTypes.NUM_INT, TokenTypes.NUM_DOUBLE, TokenTypes.NUM_FLOAT, 108 TokenTypes.TEXT_BLOCK_CONTENT, TokenTypes.COMMENT_CONTENT 109 ); 110 111 /** 112 * This regexp is used to convert new line to newline tag. 113 */ 114 private static final Pattern NEWLINE_TO_TAG = Pattern.compile("\n"); 115 116 /** 117 * This regexp is used to convert carriage return to carriage-return tag. 118 */ 119 private static final Pattern CARRIAGE_RETURN_TO_TAG = Pattern.compile("\r"); 120 121 /** Delimiter to separate xpath results. */ 122 private static final String DELIMITER = "---------" + System.lineSeparator(); 123 124 /** Stop instances being created. **/ 125 private XpathUtil() { 126 } 127 128 /** 129 * Iterates siblings of the given node and creates new Xpath-nodes. 130 * 131 * @param root the root node 132 * @param parent the parent node 133 * @param firstChild the first DetailAST 134 * @return children list 135 */ 136 public static List<AbstractNode> createChildren(AbstractNode root, AbstractNode parent, 137 DetailAST firstChild) { 138 DetailAST currentChild = firstChild; 139 final int depth = parent.getDepth() + 1; 140 final List<AbstractNode> result = new ArrayList<>(); 141 while (currentChild != null) { 142 final int index = result.size(); 143 final ElementNode child = new ElementNode(root, parent, currentChild, depth, index); 144 result.add(child); 145 currentChild = currentChild.getNextSibling(); 146 } 147 return result; 148 } 149 150 /** 151 * Checks, if specified node can have {@code @text} attribute. 152 * 153 * @param ast {@code DetailAst} element 154 * @return true if element supports {@code @text} attribute, false otherwise 155 */ 156 public static boolean supportsTextAttribute(DetailAST ast) { 157 return TOKEN_TYPES_WITH_TEXT_ATTRIBUTE.get(ast.getType()); 158 } 159 160 /** 161 * Returns content of the text attribute of the ast element. 162 * 163 * @param ast {@code DetailAst} element 164 * @return text attribute of the ast element 165 */ 166 public static String getTextAttributeValue(DetailAST ast) { 167 String text = ast.getText(); 168 if (ast.getType() == TokenTypes.STRING_LITERAL) { 169 text = text.substring(1, text.length() - 1); 170 } 171 text = CARRIAGE_RETURN_TO_TAG.matcher(text).replaceAll("\\\\r"); 172 return NEWLINE_TO_TAG.matcher(text).replaceAll("\\\\n"); 173 } 174 175 /** 176 * Returns xpath query results on file as string. 177 * 178 * @param xpath query to evaluate 179 * @param file file to run on 180 * @return all results as string separated by delimiter 181 * @throws CheckstyleException if some parsing error happens 182 * @throws IOException if an error occurs 183 */ 184 public static String printXpathBranch(String xpath, File file) throws CheckstyleException, 185 IOException { 186 try { 187 final RootNode rootNode = new RootNode(JavaParser.parseFile(file, 188 JavaParser.Options.WITH_COMMENTS)); 189 final List<NodeInfo> matchingItems = getXpathItems(xpath, rootNode); 190 return matchingItems.stream() 191 .map(item -> ((ElementNode) item).getUnderlyingNode()) 192 .map(AstTreeStringPrinter::printBranch) 193 .collect(Collectors.joining(DELIMITER)); 194 } 195 catch (XPathException ex) { 196 final String errMsg = String.format(Locale.ROOT, 197 "Error during evaluation for xpath: %s, file: %s", xpath, file.getCanonicalPath()); 198 throw new CheckstyleException(errMsg, ex); 199 } 200 } 201 202 /** 203 * Returns list of nodes matching xpath expression given node context. 204 * 205 * @param xpath Xpath expression 206 * @param rootNode {@code NodeInfo} node context 207 * @return list of nodes matching xpath expression given node context 208 * @throws XPathException if Xpath cannot be parsed 209 */ 210 public static List<NodeInfo> getXpathItems(String xpath, AbstractNode rootNode) 211 throws XPathException { 212 final XPathEvaluator xpathEvaluator = new XPathEvaluator(Configuration.newConfiguration()); 213 final XPathExpression xpathExpression = xpathEvaluator.createExpression(xpath); 214 final XPathDynamicContext xpathDynamicContext = xpathExpression 215 .createDynamicContext(rootNode); 216 final List<Item> items = xpathExpression.evaluate(xpathDynamicContext); 217 return UnmodifiableCollectionUtil.unmodifiableList(items, NodeInfo.class); 218 } 219}