View Javadoc
1   /*
2    * Licensed to the Apache Software Foundation (ASF) under one
3    * or more contributor license agreements.  See the NOTICE file
4    * distributed with this work for additional information
5    * regarding copyright ownership.  The ASF licenses this file
6    * to you under the Apache License, Version 2.0 (the
7    * "License"); you may not use this file except in compliance
8    * with the License.  You may obtain a copy of the License at
9    *
10   *   http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing,
13   * software distributed under the License is distributed on an
14   * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15   * KIND, either express or implied.  See the License for the
16   * specific language governing permissions and limitations
17   * under the License.
18   */
19  package org.devacfr.maven.skins.reflow.snippet;
20  
21  import java.util.List;
22  import java.util.Map;
23  import java.util.regex.MatchResult;
24  import java.util.regex.Matcher;
25  import java.util.regex.Pattern;
26  
27  import com.google.common.base.Strings;
28  import com.google.common.collect.Lists;
29  import com.google.common.collect.Maps;
30  import org.apache.commons.lang3.StringEscapeUtils;
31  import org.devacfr.maven.skins.reflow.snippet.ComponentToken.Tag;
32  import org.devacfr.maven.skins.reflow.snippet.ComponentToken.Type;
33  import org.jsoup.nodes.Document;
34  import org.jsoup.nodes.Element;
35  import org.jsoup.nodes.Node;
36  import org.jsoup.select.Collector;
37  import org.jsoup.select.Elements;
38  import org.jsoup.select.Evaluator;
39  import org.jsoup.select.NodeTraversor;
40  import org.jsoup.select.NodeVisitor;
41  import org.jsoup.select.QueryParser;
42  import org.slf4j.Logger;
43  import org.slf4j.LoggerFactory;
44  
45  /**
46   * Resolve the type and tag type of component.
47   *
48   * @author Christophe Friederich
49   * @version 2.4
50   */
51  public class ComponentResolver {
52  
53      private static final Logger LOGGER = LoggerFactory.getLogger(ComponentResolver.class);
54  
55      /** **/
56      private static final Pattern RESOLVER_PATTERN = Pattern.compile(
57          "\\{\\{(<|%) (\\/?)([\\w\\-_]*)(\\s?(?:[\\w\\-_]*)(?:=[\\u201c|\"](?:[\\s\\w\\p{Punct}]*)[\\u201d|\"])?)* (\\/?)(>|%)\\}\\}",
58          Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS);
59  
60      private static final Pattern ATTRIBUTE_PATTERN = Pattern.compile("\\s?(\\w*)=(\\\")?(\\w*)\2\\s?",
61          Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS);
62  
63      /**
64       * Default constructor
65       */
66      public ComponentResolver() {
67          super();
68      }
69  
70      public static boolean isSnippet(final Node node) {
71          return node.hasAttr("shortcode") || node.hasAttr("webcomponent");
72      }
73  
74      /**
75       * Collects all (start,end,empty) Element corresponding to a snippet component.
76       *
77       * @param document
78       *            the Jsoup element to use
79       * @return Return a {@link Elements} representing all web components contained in Jsoup document.
80       */
81      public Elements collect(final Element document) {
82          return collect(document, RESOLVER_PATTERN);
83      }
84  
85      /**
86       * @param document
87       *            the Jsoup element to use
88       * @return
89       */
90      public static boolean hasIncludedSnippetComponent(final Element document) {
91          return Collector.findFirst(QueryParser.parse("[shortcode],[webcomponent]"), document) != null;
92      }
93  
94      /**
95       * Normalise the {@link Document} to enclose inline snippet in html element.
96       *
97       * @param document
98       *            the document to use
99       * @return Returns the same normalised {@link Document}.
100      */
101     public Document normalize(final Document document) {
102 
103         final Elements elements = collect(document);
104         if (LOGGER.isDebugEnabled()) {
105             LOGGER.debug("Snippet Collected");
106             LOGGER.debug(elements.toString());
107         }
108         // remove all section tags
109         if (!elements.isEmpty()) {
110             final Elements sections = Collector.collect(new Evaluator.Tag("section"), document);
111             sections.forEach(Element::unwrap);
112         }
113 
114         elements.forEach(element -> {
115             String text = StringEscapeUtils.unescapeHtml4(element.html());
116             final Matcher matcher = RESOLVER_PATTERN.matcher(text);
117 
118             final List<MatchResult> results = Lists.newArrayList();
119 
120             while (matcher.find()) {
121                 final MatchResult matchResult = matcher.toMatchResult();
122                 if (matchResult.start() > 0 || matchResult.end() < text.length()) {
123                     results.add(0, matcher.toMatchResult());
124                 }
125             }
126             if (!results.isEmpty()) {
127                 for (final MatchResult matchResult : results) {
128                     final String snippet = text.substring(matchResult.start(), matchResult.end());
129                     text = text.substring(0, matchResult.start()) + "<span>" + StringEscapeUtils.escapeHtml4(snippet)
130                             + "</span>" + text.substring(matchResult.end());
131                 }
132                 element.html(text);
133             }
134         });
135         return document;
136     }
137 
138     /**
139      * Create a {@link ComponentToken} corresponding to the element.
140      *
141      * @param element
142      *            the element to use.
143      * @return Return a new instance of {@link ComponentToken} representing the element.
144      */
145     public ComponentToken create(final Element element) {
146         if (isSnippet(element)) {
147             Type type = null;
148             if (element.hasAttr("shortcode")) {
149                 type = Type.shortcode;
150             } else if (element.hasAttr("webcomponent")) {
151                 type = Type.webComponent;
152             }
153             return new ComponentToken(element, element.tagName(), Tag.html, type);
154         } else {
155             final Matcher matcher = RESOLVER_PATTERN.matcher(element.ownText());
156 
157             if (matcher.matches()) {
158                 return createToken(element, matcher);
159             }
160         }
161         return null;
162     }
163 
164     private ComponentToken createToken(final Element element, final Matcher matcher) {
165         if (!Strings.isNullOrEmpty(matcher.group(2)) && !Strings.isNullOrEmpty(matcher.group(5))) {
166             // can not have same time empty and end identifier.
167             throw new RuntimeException("malformed component");
168         }
169         final Type type = "<".equals(matcher.group(1)) ? Type.shortcode : Type.webComponent;
170         Tag tag = Tag.start;
171         if ("/".equals(matcher.group(2))) {
172             tag = Tag.end;
173         } else if ("/".equals(matcher.group(5))) {
174             tag = Tag.empty;
175         }
176 
177         return new ComponentToken(element, matcher.group(3), tag, type);
178     }
179 
180     protected static Map<String, String> extractAttributes(final String text) {
181         final Map<String, String> attrs = Maps.newHashMap();
182         final Matcher matcher = ATTRIBUTE_PATTERN.matcher(text);
183         while (matcher.find()) {
184             attrs.put(matcher.group(1).toLowerCase(), matcher.group(3));
185         }
186         return attrs;
187     }
188 
189     public static Elements collect(final Element root, final Pattern searchPattern) {
190         final Elements elements = new Elements();
191         NodeTraversor.traverse(new Accumulator(root, elements, searchPattern), root);
192         return elements;
193     }
194 
195     private static class Accumulator implements NodeVisitor {
196 
197         /** */
198         private final Pattern searchPattern;
199 
200         private final Element root;
201 
202         private final Elements elements;
203 
204         Accumulator(final Element root, final Elements elements, final Pattern searchPattern) {
205             this.root = root;
206             this.elements = elements;
207             this.searchPattern = searchPattern;
208         }
209 
210         @Override
211         public void head(final Node node, final int depth) {
212             if (node instanceof Element) {
213                 final Element el = (Element) node;
214                 if (matches(root, el)) {
215                     elements.add(el);
216                 }
217             }
218         }
219 
220         public boolean matches(final Element root, final Element element) {
221             // exclude if in <pre> element, allowing highlight component in documentation
222             if ("pre".equals(element.tagName()) || "code".equals(element.tagName())
223                     || element.hasParent() && "pre".equals(element.parent().tagName())) {
224                 return false;
225             }
226             return searchPattern.matcher(element.ownText()).find();
227         }
228 
229         @Override
230         public void tail(final Node node, final int depth) {
231             if (node instanceof Element && isSnippet(node)) {
232                 elements.add((Element) node);
233             }
234         }
235     }
236 }