View Javadoc
1   /*
2   * Copyright 2012-2025 Christophe Friederich
3   *
4   * Licensed under the Apache License, Version 2.0 (the "License");
5   * you may not use this file except in compliance with the License.
6   * You may obtain a copy of the License at
7   *
8   * http://www.apache.org/licenses/LICENSE-2.0
9   *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16  package org.devacfr.maven.skins.reflow.snippet;
17  
18  import static java.util.Objects.requireNonNull;
19  
20  import com.google.common.base.Strings;
21  import com.google.common.collect.Lists;
22  import com.google.common.collect.Maps;
23  import java.util.List;
24  import java.util.Map;
25  import java.util.regex.MatchResult;
26  import java.util.regex.Matcher;
27  import java.util.regex.Pattern;
28  import org.apache.commons.text.StringEscapeUtils;
29  import org.devacfr.maven.skins.reflow.JsoupUtils;
30  import org.devacfr.maven.skins.reflow.snippet.ComponentToken.TagType;
31  import org.devacfr.maven.skins.reflow.snippet.SnippetComponent.Type;
32  import org.jsoup.nodes.Document;
33  import org.jsoup.nodes.Element;
34  import org.jsoup.nodes.Node;
35  import org.jsoup.select.Elements;
36  import org.jsoup.select.NodeTraversor;
37  import org.jsoup.select.NodeVisitor;
38  import org.slf4j.Logger;
39  import org.slf4j.LoggerFactory;
40  
41  /**
42   * Resolve the type and tag type of component.
43   *
44   * @author Christophe Friederich
45   * @version 2.4
46   */
47  public class ComponentResolver {
48  
49    private static final Logger LOGGER = LoggerFactory.getLogger(ComponentResolver.class);
50  
51    /** **/
52    private static final Pattern RESOLVER_PATTERN = Pattern.compile(
53      "\\{\\{(<|%) (\\/?)([\\w\\-_]*)(\\s?(?:[\\w\\-_]*)(?:=[\\u201c|\"](?:[\\s\\w\\p{Punct}]*)[\\u201d|\"])?)* (\\/?)(>|%)\\}\\}",
54      Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS);
55  
56    private static final Pattern ATTRIBUTE_PATTERN = Pattern.compile("\\s?(\\w*)=(\\\")?(\\w*)\2\\s?",
57      Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS);
58  
59    private final SnippetParser parser;
60  
61    /**
62     * Default constructor
63     */
64    public ComponentResolver() {
65      this(new SnippetParser());
66    }
67  
68    public ComponentResolver(final SnippetParser parser) {
69      super();
70      this.parser = requireNonNull(parser);
71    }
72  
73    public boolean isSnippet(final Node node) {
74      return parser.isSnippet(node);
75    }
76  
77    /**
78     * Collects all (start,end,empty) Element corresponding to a snippet component.
79     *
80     * @param document
81     *          the Jsoup element to use
82     * @return Return a {@link Elements} representing all web components contained in Jsoup document.
83     */
84    public Elements collect(final Element document) {
85      return collect(document, RESOLVER_PATTERN);
86    }
87  
88    /**
89     * Normalise the {@link Document} to enclose inline snippet in html element.
90     *
91     * @param document
92     *          the document to use
93     * @return Returns the same normalised {@link Document}.
94     */
95    public Element normalize(final Document document) {
96  
97      final Elements elements = collect(document);
98      if (LOGGER.isDebugEnabled()) {
99        LOGGER.debug("Snippet Collected");
100       LOGGER.debug(elements.toString());
101     }
102     // remove all section tags
103     if (!elements.isEmpty()) {
104       final Elements sections = document.getElementsByTag("section");
105       sections.forEach(Element::unwrap);
106     }
107 
108     elements.forEach(element -> {
109       String text = StringEscapeUtils.unescapeHtml4(element.html());
110       final Matcher matcher = RESOLVER_PATTERN.matcher(text);
111 
112       final List<MatchResult> results = Lists.newArrayList();
113 
114       while (matcher.find()) {
115         final MatchResult matchResult = matcher.toMatchResult();
116         // add only inner matches
117         if (matchResult.start() >= 0 || matchResult.end() <= text.length()) {
118           results.add(0, matcher.toMatchResult());
119         }
120       }
121       String convertedText = text;
122       // process from end to start
123       if (!results.isEmpty()) {
124         for (final MatchResult matchResult : results) {
125           // escape snippet tag
126           final String snippet = text.substring(matchResult.start(), matchResult.end());
127           // wrap snippet in span for display
128           text = text.substring(0, matchResult.start()) + "<span>" + StringEscapeUtils.escapeHtml4(snippet) + "</span>"
129               + text.substring(matchResult.end());
130           // convert snippet to html
131           final String convertedSnippet = convertSnippetTagsToHtml(
132             convertedText.substring(matchResult.start(), matchResult.end()));
133           // replace in text
134           convertedText = convertedText.substring(0, matchResult.start()) + convertedSnippet
135               + convertedText.substring(matchResult.end());
136         }
137         // convert to html
138         // remove all snippet tags to defirentiate inline snippet in paragraph to
139         // sequence of snippets.
140         Element body = JsoupUtils.createHtmlDocument(convertSnippetTagsToHtml(convertedText)).body();
141         body.children().forEach(e -> {
142           if (parser.isSnippet(e))
143             e.remove();
144         });
145         boolean containsOnlySnippets = !JsoupUtils.hasTextNode(body);
146         element.html(text);
147 
148         // remove <p> tag if not necessary, i.e. contains only snippet components
149         if (element.tagName().equals("p") && containsOnlySnippets) {
150           element.unwrap();
151         }
152       }
153     });
154     return document;
155   }
156 
157   /**
158    * Create a {@link ComponentToken} corresponding to the element.
159    *
160    * @param element
161    *          the element to use.
162    * @return Return a new instance of {@link ComponentToken} representing the element.
163    */
164   public ComponentToken create(final Element element) {
165     if (isSnippet(element)) {
166       Type type = Type.webComponent;
167       return new ComponentToken(element, element.tagName(), TagType.html, type);
168     } else {
169       final Matcher matcher = RESOLVER_PATTERN.matcher(element.ownText());
170 
171       if (matcher.matches()) {
172         return createToken(element, matcher);
173       }
174     }
175     return null;
176   }
177 
178   private ComponentToken createToken(final Element element, final Matcher matcher) {
179     if (!Strings.isNullOrEmpty(matcher.group(2)) && !Strings.isNullOrEmpty(matcher.group(5))) {
180       // can not have same time empty and end identifier.
181       throw new RuntimeException("malformed component");
182     }
183     TagType tag = TagType.start;
184     if ("/".equals(matcher.group(2))) {
185       tag = TagType.end;
186     } else if ("/".equals(matcher.group(5))) {
187       tag = TagType.empty;
188     }
189     final Type type = "<".equals(matcher.group(1)) ? Type.shortcode : Type.webComponent;
190 
191     return new ComponentToken(element, matcher.group(3), tag, type);
192   }
193 
194   protected static Map<String, String> extractAttributes(final String text) {
195     final Map<String, String> attrs = Maps.newHashMap();
196     final Matcher matcher = ATTRIBUTE_PATTERN.matcher(text);
197     while (matcher.find()) {
198       attrs.put(matcher.group(1).toLowerCase(), matcher.group(3));
199     }
200     return attrs;
201   }
202 
203   public Elements collect(final Element root, final Pattern searchPattern) {
204     final Elements elements = new Elements();
205     NodeTraversor.traverse(new Accumulator(parser, root, elements, searchPattern), root);
206     return elements;
207   }
208 
209   /**
210    * Converts the text of element to html format.
211    *
212    * @param element
213    *          the html element to use.
214    * @return Returns a {@link String} representing the snippet element in html format.
215    */
216   public static String convertElementTextToHtml(final Element element) {
217     return convertSnippetTagsToHtml(element.text());
218   }
219 
220   /**
221    * Converts the snippet tags to html format.
222    *
223    * @param html
224    *          the html to use.
225    * @return Returns a {@link String} representing the snippet html in html format.
226    */
227   public static String convertSnippetTagsToHtml(final String html) {
228     String text = StringEscapeUtils.unescapeHtml4(html);
229     return text.replace("{{< ", "<")
230         .replace(" />}}", "/>")
231         .replace(" /%}}", "/>")
232         .replace(" >}}", ">")
233         .replace("{{% ", "<")
234         .replace(" %}}", ">")
235         .replaceAll("\\u201c|\\u201d", "\"");
236   }
237 
238   private static class Accumulator implements NodeVisitor {
239 
240     /** */
241     private final Pattern searchPattern;
242 
243     private final Element root;
244 
245     private final Elements elements;
246 
247     private final SnippetParser parser;
248 
249     Accumulator(final SnippetParser parser, final Element root, final Elements elements, final Pattern searchPattern) {
250       this.root = root;
251       this.elements = elements;
252       this.searchPattern = searchPattern;
253       this.parser = parser;
254     }
255 
256     @Override
257     public void head(final Node node, final int depth) {
258       if (node instanceof Element) {
259         final Element el = (Element) node;
260         if (matches(root, el)) {
261           elements.add(el);
262         }
263       }
264     }
265 
266     public boolean matches(final Element root, final Element element) {
267       // exclude if in <pre> element, allowing highlight component in documentation
268       if ("pre".equals(element.tagName()) || "code".equals(element.tagName())
269           || element.hasParent() && "pre".equals(element.parent().tagName())) {
270         return false;
271       }
272       return searchPattern.matcher(element.ownText()).find();
273     }
274 
275     @Override
276     public void tail(final Node node, final int depth) {
277       if (node instanceof Element && parser.isSnippet(node)) {
278         elements.add((Element) node);
279       }
280     }
281   }
282 }