1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19 package org.devacfr.maven.skins.reflow.snippet;
20
21 import java.util.List;
22 import java.util.Map;
23 import java.util.regex.MatchResult;
24 import java.util.regex.Matcher;
25 import java.util.regex.Pattern;
26
27 import com.google.common.base.Strings;
28 import com.google.common.collect.Lists;
29 import com.google.common.collect.Maps;
30 import org.apache.commons.lang3.StringEscapeUtils;
31 import org.devacfr.maven.skins.reflow.snippet.ComponentToken.Tag;
32 import org.devacfr.maven.skins.reflow.snippet.ComponentToken.Type;
33 import org.jsoup.nodes.Document;
34 import org.jsoup.nodes.Element;
35 import org.jsoup.nodes.Node;
36 import org.jsoup.select.Collector;
37 import org.jsoup.select.Elements;
38 import org.jsoup.select.Evaluator;
39 import org.jsoup.select.NodeTraversor;
40 import org.jsoup.select.NodeVisitor;
41 import org.jsoup.select.QueryParser;
42 import org.slf4j.Logger;
43 import org.slf4j.LoggerFactory;
44
45
46
47
48
49
50
51 public class ComponentResolver {
52
53 private static final Logger LOGGER = LoggerFactory.getLogger(ComponentResolver.class);
54
55
56 private static final Pattern RESOLVER_PATTERN = Pattern.compile(
57 "\\{\\{(<|%) (\\/?)([\\w\\-_]*)(\\s?(?:[\\w\\-_]*)(?:=[\\u201c|\"](?:[\\s\\w\\p{Punct}]*)[\\u201d|\"])?)* (\\/?)(>|%)\\}\\}",
58 Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS);
59
60 private static final Pattern ATTRIBUTE_PATTERN = Pattern.compile("\\s?(\\w*)=(\\\")?(\\w*)\2\\s?",
61 Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CHARACTER_CLASS);
62
63
64
65
66 public ComponentResolver() {
67 super();
68 }
69
70 public static boolean isSnippet(final Node node) {
71 return node.hasAttr("shortcode") || node.hasAttr("webcomponent");
72 }
73
74
75
76
77
78
79
80
81 public Elements collect(final Element document) {
82 return collect(document, RESOLVER_PATTERN);
83 }
84
85
86
87
88
89
90 public static boolean hasIncludedSnippetComponent(final Element document) {
91 return Collector.findFirst(QueryParser.parse("[shortcode],[webcomponent]"), document) != null;
92 }
93
94
95
96
97
98
99
100
101 public Document normalize(final Document document) {
102
103 final Elements elements = collect(document);
104 if (LOGGER.isDebugEnabled()) {
105 LOGGER.debug("Snippet Collected");
106 LOGGER.debug(elements.toString());
107 }
108
109 if (!elements.isEmpty()) {
110 final Elements sections = Collector.collect(new Evaluator.Tag("section"), document);
111 sections.forEach(Element::unwrap);
112 }
113
114 elements.forEach(element -> {
115 String text = StringEscapeUtils.unescapeHtml4(element.html());
116 final Matcher matcher = RESOLVER_PATTERN.matcher(text);
117
118 final List<MatchResult> results = Lists.newArrayList();
119
120 while (matcher.find()) {
121 final MatchResult matchResult = matcher.toMatchResult();
122 if (matchResult.start() > 0 || matchResult.end() < text.length()) {
123 results.add(0, matcher.toMatchResult());
124 }
125 }
126 if (!results.isEmpty()) {
127 for (final MatchResult matchResult : results) {
128 final String snippet = text.substring(matchResult.start(), matchResult.end());
129 text = text.substring(0, matchResult.start()) + "<span>" + StringEscapeUtils.escapeHtml4(snippet)
130 + "</span>" + text.substring(matchResult.end());
131 }
132 element.html(text);
133 }
134 });
135 return document;
136 }
137
138
139
140
141
142
143
144
145 public ComponentToken create(final Element element) {
146 if (isSnippet(element)) {
147 Type type = null;
148 if (element.hasAttr("shortcode")) {
149 type = Type.shortcode;
150 } else if (element.hasAttr("webcomponent")) {
151 type = Type.webComponent;
152 }
153 return new ComponentToken(element, element.tagName(), Tag.html, type);
154 } else {
155 final Matcher matcher = RESOLVER_PATTERN.matcher(element.ownText());
156
157 if (matcher.matches()) {
158 return createToken(element, matcher);
159 }
160 }
161 return null;
162 }
163
164 private ComponentToken createToken(final Element element, final Matcher matcher) {
165 if (!Strings.isNullOrEmpty(matcher.group(2)) && !Strings.isNullOrEmpty(matcher.group(5))) {
166
167 throw new RuntimeException("malformed component");
168 }
169 final Type type = "<".equals(matcher.group(1)) ? Type.shortcode : Type.webComponent;
170 Tag tag = Tag.start;
171 if ("/".equals(matcher.group(2))) {
172 tag = Tag.end;
173 } else if ("/".equals(matcher.group(5))) {
174 tag = Tag.empty;
175 }
176
177 return new ComponentToken(element, matcher.group(3), tag, type);
178 }
179
180 protected static Map<String, String> extractAttributes(final String text) {
181 final Map<String, String> attrs = Maps.newHashMap();
182 final Matcher matcher = ATTRIBUTE_PATTERN.matcher(text);
183 while (matcher.find()) {
184 attrs.put(matcher.group(1).toLowerCase(), matcher.group(3));
185 }
186 return attrs;
187 }
188
189 public static Elements collect(final Element root, final Pattern searchPattern) {
190 final Elements elements = new Elements();
191 NodeTraversor.traverse(new Accumulator(root, elements, searchPattern), root);
192 return elements;
193 }
194
195 private static class Accumulator implements NodeVisitor {
196
197
198 private final Pattern searchPattern;
199
200 private final Element root;
201
202 private final Elements elements;
203
204 Accumulator(final Element root, final Elements elements, final Pattern searchPattern) {
205 this.root = root;
206 this.elements = elements;
207 this.searchPattern = searchPattern;
208 }
209
210 @Override
211 public void head(final Node node, final int depth) {
212 if (node instanceof Element) {
213 final Element el = (Element) node;
214 if (matches(root, el)) {
215 elements.add(el);
216 }
217 }
218 }
219
220 public boolean matches(final Element root, final Element element) {
221
222 if ("pre".equals(element.tagName()) || "code".equals(element.tagName())
223 || element.hasParent() && "pre".equals(element.parent().tagName())) {
224 return false;
225 }
226 return searchPattern.matcher(element.ownText()).find();
227 }
228
229 @Override
230 public void tail(final Node node, final int depth) {
231 if (node instanceof Element && isSnippet(node)) {
232 elements.add((Element) node);
233 }
234 }
235 }
236 }