1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package org.devacfr.maven.skins.reflow;
17
18 import static java.util.Collections.emptyList;
19 import static java.util.Objects.requireNonNull;
20
21 import com.google.common.base.Strings;
22 import com.google.common.collect.Lists;
23 import com.google.common.collect.Sets;
24 import java.text.Normalizer;
25 import java.text.Normalizer.Form;
26 import java.util.ArrayList;
27 import java.util.Arrays;
28 import java.util.Collection;
29 import java.util.Collections;
30 import java.util.HashSet;
31 import java.util.List;
32 import java.util.Locale;
33 import java.util.Map;
34 import java.util.Map.Entry;
35 import java.util.Set;
36 import java.util.Stack;
37 import java.util.regex.Pattern;
38 import javax.annotation.Nonnull;
39 import javax.annotation.Nullable;
40 import org.apache.commons.lang3.builder.ToStringBuilder;
41 import org.apache.velocity.tools.ToolContext;
42 import org.apache.velocity.tools.config.DefaultKey;
43 import org.apache.velocity.tools.generic.SafeConfig;
44 import org.apache.velocity.tools.generic.ValueParser;
45 import org.jsoup.Jsoup;
46 import org.jsoup.internal.StringUtil;
47 import org.jsoup.nodes.Document;
48 import org.jsoup.nodes.Element;
49 import org.jsoup.nodes.Node;
50 import org.jsoup.parser.Tag;
51 import org.slf4j.Logger;
52 import org.slf4j.LoggerFactory;
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68 @DefaultKey("htmlTool")
69 public class HtmlTool extends SafeConfig {
70
71
72 private static final Logger LOGGER = LoggerFactory.getLogger(HtmlTool.class);
73
74 private static final int SLUG_SIZE = 50;
75
76
77 public static final String DEFAULT_SLUG_SEPARATOR = "-";
78
79
80 private static final String SEPARATOR_TOC = "_toc_";
81
82
83 private static final List<String> HEADINGS = Collections
84 .unmodifiableList(Arrays.asList("h1", "h2", "h3", "h4", "h5", "h6"));
85
86
87 public enum JoinSeparator {
88
89
90
91 AFTER,
92
93
94
95 BEFORE,
96
97 NO
98 }
99
100
101 private String outputEncoding = "UTF-8";
102
103 private boolean prettyPrint = true;
104
105
106
107
108
109
110 @Override
111 protected void configure(final ValueParser values) {
112
113
114 final Object velocityContext = values.get("velocityContext");
115
116 if (!(velocityContext instanceof ToolContext)) {
117 return;
118 }
119
120 final ToolContext ctxt = (ToolContext) velocityContext;
121
122
123 final Object outputEncodingObj = ctxt.get("outputEncoding");
124 if (outputEncodingObj instanceof String) {
125 this.outputEncoding = (String) outputEncodingObj;
126 }
127
128 final Object prettyPrint = ctxt.get("prettyPrint");
129 if (prettyPrint instanceof Boolean) {
130 this.prettyPrint = (Boolean) prettyPrint;
131 }
132 }
133
134
135
136
137
138
139
140
141
142 @Nullable public String normaliseWhitespace(@Nullable final String html) {
143 if (html == null) {
144 return null;
145 }
146 return StringUtil.normaliseWhitespace(html);
147 }
148
149
150
151
152
153
154
155
156
157
158
159
160
161 public List<String> split(@Nonnull final String content, @Nonnull final String separatorCssSelector) {
162 return split(content, separatorCssSelector, JoinSeparator.NO);
163 }
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182 public List<String> splitOnStarts(final @Nonnull String content, final @Nonnull String separatorCssSelector) {
183
184 final List<String> result = split(content, separatorCssSelector, JoinSeparator.AFTER);
185
186 if (result == null || result.size() <= 1) {
187
188 return result;
189 }
190
191
192
193
194 return result.subList(1, result.size());
195 }
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211 public List<String> split(final @Nonnull String content,
212 final @Nonnull String separatorCssSelector,
213 final String separatorStrategy) {
214
215 JoinSeparator sepStrategy;
216 if ("before".equals(separatorStrategy)) {
217 sepStrategy = JoinSeparator.BEFORE;
218 } else if ("after".equals(separatorStrategy)) {
219 sepStrategy = JoinSeparator.AFTER;
220 } else {
221 sepStrategy = JoinSeparator.NO;
222 }
223
224 return split(content, separatorCssSelector, sepStrategy);
225 }
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245 public List<String> split(@Nonnull final String content,
246 @Nonnull final String separatorCssSelector,
247 @Nonnull final JoinSeparator separatorStrategy) {
248
249 requireNonNull(separatorStrategy);
250 final Element body = parse(content).body();
251
252 final List<Element> separators = body.select(separatorCssSelector);
253 if (separators.size() > 0) {
254 final List<List<Element>> partitions = split(separators, separatorStrategy, body);
255
256 final List<String> sectionHtml = new ArrayList<>();
257
258 for (final List<Element> partition : partitions) {
259 final String html = outerHtml(partition);
260 if (!Strings.isNullOrEmpty(html)) {
261 sectionHtml.add(outerHtml(partition));
262 }
263 }
264
265 return sectionHtml;
266 } else {
267
268 return Collections.singletonList(content);
269 }
270 }
271
272
273
274
275
276
277
278
279
280
281
282
283 private static List<List<Element>> split(final Collection<Element> separators,
284 final JoinSeparator separatorStrategy,
285 final Element parent) {
286
287 final List<List<Element>> partitions = Lists.newLinkedList();
288
289 for (final Element child : parent.children()) {
290
291 if (separators.contains(child)) {
292
293
294
295
296 getLastPartition(partitions);
297
298 if (separatorStrategy == JoinSeparator.BEFORE) {
299
300 getLastPartition(partitions).add(child);
301 }
302
303
304 final List<Element> newPartition = Lists.newLinkedList();
305 partitions.add(newPartition);
306
307 if (separatorStrategy == JoinSeparator.AFTER) {
308
309 newPartition.add(child);
310 }
311
312 } else {
313
314 final List<List<Element>> childPartitions = split(separators, separatorStrategy, child);
315
316
317 getLastPartition(partitions).add(child);
318
319 if (childPartitions.size() > 1) {
320
321
322
323
324 final List<Element> allChildren = child.children();
325 final List<Element> firstPartition = childPartitions.get(0);
326
327 allChildren.removeAll(firstPartition);
328 for (final Element removeChild : allChildren) {
329 removeChild.remove();
330 }
331
332
333 for (final List<Element> nextPartition : childPartitions.subList(1, childPartitions.size())) {
334 partitions.add(nextPartition);
335 }
336 }
337 }
338 }
339
340 return partitions;
341 }
342
343
344
345
346
347
348
349 private static List<Element> getLastPartition(final List<List<Element>> partitions) {
350 if (partitions.isEmpty()) {
351 final List<Element> newPartition = Lists.newLinkedList();
352 partitions.add(newPartition);
353 return newPartition;
354 } else {
355 return partitions.get(partitions.size() - 1);
356 }
357 }
358
359
360
361
362
363
364
365 private static String outerHtml(final List<Element> elements) {
366
367 switch (elements.size()) {
368 case 0:
369 return "";
370
371 case 1:
372 return elements.get(0).outerHtml();
373
374 default:
375
376
377 final Element root = new Element(Tag.valueOf("div"), "");
378 for (final Element elem : elements) {
379 root.appendChild(elem);
380 }
381
382 return root.html();
383 }
384 }
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399 public String reorderToTop(final String content, final String selector, final int amount) {
400 return reorderToTop(content, selector, amount, null);
401 }
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418 public String reorderToTop(final String content,
419 final String selector,
420 final int amount,
421 final String wrapRemaining) {
422
423
424 final List<Element> extracted = extractElements(content, selector, amount);
425
426 if (extracted.size() > 1) {
427
428 final Element body = extracted.get(0);
429
430 if (wrapRemaining != null) {
431 wrapInner(body, wrapRemaining);
432 }
433
434 final List<Element> elements = extracted.subList(1, extracted.size());
435
436
437
438 for (int index = elements.size() - 1; index >= 0; index--) {
439 body.prependChild(elements.get(index));
440 }
441
442 return body.html();
443 } else {
444
445 return content;
446 }
447 }
448
449 private static Element wrapInner(final Element element, final String html) {
450
451
452
453 final Element topDiv = new Element(Tag.valueOf("div"), "");
454 for (final Element topElem : element.children()) {
455
456 topElem.remove();
457 topDiv.appendChild(topElem);
458 }
459
460
461 element.appendChild(topDiv);
462
463
464 topDiv.wrap(html);
465
466 topDiv.unwrap();
467
468 return element;
469 }
470
471
472
473
474
475
476
477
478
479
480 private List<Element> extractElements(final String content, final String selector, final int amount) {
481
482 final Element body = parse(content).body();
483
484 List<Element> elements = body.select(selector);
485 if (elements.size() > 0) {
486
487 elements = filterParents(elements);
488
489 if (amount >= 0) {
490
491 elements = elements.subList(0, Math.min(amount, elements.size()));
492 }
493
494
495 for (final Element element : elements) {
496 element.remove();
497 }
498 }
499
500 final List<Element> results = new ArrayList<>();
501
502 results.add(body);
503 results.addAll(elements);
504 return results;
505 }
506
507
508
509
510
511
512
513
514 private static List<Element> filterParents(final List<Element> elements) {
515 final List<Element> filtered = new ArrayList<>();
516 for (final Element element : elements) {
517
518 final List<Element> parentsInter = element.parents().asList();
519 parentsInter.retainAll(elements);
520 if (parentsInter.isEmpty()) {
521
522 filtered.add(element);
523 }
524 }
525
526 return filtered;
527 }
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544 @Nonnull
545 public ExtractResult extract(final String content, final String selector, final int amount) {
546
547 final List<Element> extracted = extractElements(content, selector, amount);
548
549 if (extracted.size() > 1) {
550
551
552 final Element body = extracted.get(0);
553 final List<Element> elements = extracted.subList(1, extracted.size());
554
555
556 final List<String> elementStr = new ArrayList<>();
557 for (final Element el : elements) {
558 elementStr.add(el.outerHtml());
559 }
560
561 return new DefaultExtractResult(elementStr, body.html());
562 } else {
563
564 return new DefaultExtractResult(Collections.<String> emptyList(), content);
565 }
566 }
567
568
569
570
571
572
573
574
575 public interface ExtractResult {
576
577
578
579
580
581
582 List<String> getExtracted();
583
584
585
586
587
588
589 String getRemainder();
590 }
591
592
593
594
595 private static final class DefaultExtractResult implements ExtractResult {
596
597
598 private final List<String> extracted;
599
600
601 private final String remainder;
602
603 private DefaultExtractResult(final List<String> extracted, final String remainder) {
604 this.extracted = extracted;
605 this.remainder = remainder;
606 }
607
608 @Override
609 public List<String> getExtracted() {
610 return Collections.unmodifiableList(extracted);
611 }
612
613 @Override
614 public String getRemainder() {
615 return remainder;
616 }
617 }
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633 public String setAttr(final String content, final String selector, final String attributeKey, final String value) {
634
635 final Element body = parse(content).body();
636
637 final List<Element> elements = body.select(selector);
638 if (elements.size() > 0) {
639
640 for (final Element element : elements) {
641 element.attr(attributeKey, value);
642 }
643
644 return body.html();
645 } else {
646
647 return content;
648 }
649 }
650
651
652
653
654
655
656
657
658 public Document parse(@Nonnull final String content) {
659 final Document doc = Jsoup.parseBodyFragment(content);
660 doc.outputSettings().charset(outputEncoding).prettyPrint(prettyPrint);
661 return doc;
662 }
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677 public List<String> getAttr(final String content, final String selector, final String attributeKey) {
678
679 final Element body = parse(content).body();
680
681 final List<Element> elements = body.select(selector);
682 final List<String> attrs = new ArrayList<>();
683
684 for (final Element element : elements) {
685 final String attrValue = element.attr(attributeKey);
686 attrs.add(attrValue);
687 }
688
689 return attrs;
690 }
691
692
693
694
695
696
697
698
699
700
701 @Nonnull
702 public String addClasses(@Nonnull String baseClass, @Nonnull String additionalClasses) {
703 return addClasses(baseClass, additionalClasses == null ? new String[] {} : additionalClasses.split(" "));
704 }
705
706
707
708
709
710
711
712
713
714
715 @Nonnull
716 public String addClasses(@Nonnull String baseClass, @Nonnull String... additionalClasses) {
717 StringBuilder sb = new StringBuilder();
718 Set<String> uniqueClasses = Sets.newHashSet();
719 uniqueClasses.addAll(Arrays.asList(baseClass.split(" ")));
720 uniqueClasses.addAll(Arrays.asList(additionalClasses));
721 for (String cl : uniqueClasses) {
722 if (!Strings.isNullOrEmpty(cl)) {
723 if (sb.length() > 0) {
724 sb.append(" ");
725 }
726 sb.append(cl);
727 }
728 }
729 return sb.toString();
730 }
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746 public String addClass(final String content, final String selector, final List<String> classNames, final int amount) {
747
748 final Element body = parse(content).body();
749
750 List<Element> elements = body.select(selector);
751 if (amount >= 0) {
752
753 elements = elements.subList(0, Math.min(amount, elements.size()));
754 }
755
756 if (elements.size() > 0) {
757
758 for (final Element element : elements) {
759 for (final String className : classNames) {
760 element.addClass(className);
761 }
762 }
763
764 return body.html();
765 } else {
766
767 return content;
768 }
769 }
770
771
772
773
774
775
776
777
778
779
780
781
782
783 public String addClass(final String content, final String selector, final List<String> classNames) {
784 return addClass(content, selector, classNames, -1);
785 }
786
787
788
789
790
791
792
793
794
795
796
797
798
799 public String addClass(final String content, final String selector, final String className) {
800 return addClass(content, selector, Collections.singletonList(className));
801 }
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817 public String wrap(final String content, final String selector, final String wrapHtml, final int amount) {
818
819 final Element body = parse(content).body();
820
821 List<Element> elements = body.select(selector);
822 if (amount >= 0) {
823
824 elements = elements.subList(0, Math.min(amount, elements.size()));
825 }
826
827 if (elements.size() > 0) {
828
829 for (final Element element : elements) {
830 element.wrap(wrapHtml);
831 }
832
833 return body.html();
834 } else {
835
836 return content;
837 }
838 }
839
840
841
842
843
844
845
846
847
848
849
850 public String remove(final String content, final String selector) {
851
852 final Element body = parse(content).body();
853
854 final List<Element> elements = body.select(selector);
855 if (elements.size() > 0) {
856 for (final Element element : elements) {
857 element.remove();
858 }
859
860 return body.html();
861 } else {
862
863 return content;
864 }
865 }
866
867
868
869
870
871
872
873
874
875
876
877
878
879 public String replace(final String content, final String selector, final String replacement) {
880 return replaceAll(content, Collections.singletonMap(selector, replacement));
881 }
882
883
884
885
886
887
888
889
890
891
892
893
894 public String replaceAll(final String content, final Map<String, String> replacements) {
895
896 final Element body = parse(content).body();
897
898 boolean modified = false;
899 for (final Entry<String, String> replacementEntry : replacements.entrySet()) {
900 final String selector = replacementEntry.getKey();
901 final String replacement = replacementEntry.getValue();
902
903 final List<Element> elements = body.select(selector);
904 if (elements.size() > 0) {
905
906
907 final Element replacementElem = parse(replacement).body().child(0);
908
909 if (replacementElem != null) {
910 for (final Element element : elements) {
911 element.replaceWith(replacementElem.clone());
912 }
913
914 modified = true;
915 }
916 }
917 }
918
919 if (modified) {
920 return body.html();
921 } else {
922
923 return content;
924 }
925 }
926
927
928
929
930
931
932
933
934
935
936
937
938
939 public String replaceWith(final String content, final String selector, final String newElement) {
940
941 final Element body = parse(content).body();
942
943 boolean modified = false;
944 final List<Element> elements = body.select(selector);
945 if (elements.size() > 0) {
946
947
948 final Element replacementElem = parse(newElement).body().child(0);
949
950 if (replacementElem != null) {
951 for (final Element element : elements) {
952 final List<Node> children = element.childNodes();
953 final Element el = replacementElem.clone();
954 for (final Node child : children) {
955 el.appendChild(child.clone());
956 }
957 element.replaceWith(el);
958 }
959
960 modified = true;
961 }
962 }
963
964 if (modified) {
965 return body.html();
966 } else {
967
968 return content;
969 }
970 }
971
972
973
974
975
976
977
978
979
980
981
982
983 @SuppressWarnings("null")
984 public List<String> text(@Nullable final String content, @Nonnull final String selector) {
985 if (Strings.isNullOrEmpty(content)) {
986 return emptyList();
987 }
988 final Element body = parse(content).body();
989
990 final List<Element> elements = body.select(selector);
991 final List<String> texts = new ArrayList<>();
992
993 for (final Element element : elements) {
994 texts.add(element.text());
995 }
996
997 return texts;
998 }
999
1000 public String link(ISkinConfig config, String href, String name, String target, String className) {
1001 return link(config, href, name, target, null, null, className);
1002 }
1003
1004 public String link(ISkinConfig config,
1005 String href,
1006 String name,
1007 String target,
1008 String img,
1009 String icon,
1010 String className) {
1011
1012 final Document doc = parse("");
1013 String css = Strings.isNullOrEmpty(className) ? "" : className;
1014 if (config.isExternalLink(href)) {
1015 css = "external-link " + className;
1016 }
1017 return JsoupUtils.link(doc, href, name, target, config.relativeLink(img), icon, css).outerHtml();
1018 }
1019
1020 public String image(ISkinConfig config, String src, String alt, String border, String width, String height) {
1021 final Document doc = parse("");
1022 return JsoupUtils.image(doc, config.relativeLink(src), alt, border, width, height).outerHtml();
1023 }
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047 public String headingAnchorToId(final String content) {
1048
1049 final Element body = parse(content).body();
1050
1051
1052 final List<String> headNoIds = concat(HEADINGS, ":not([id])", true);
1053
1054
1055 final String nameA = "a[name]:not([href])";
1056
1057
1058 final List<Element> headingsInnerA = body.select(String.join(", ", concat(headNoIds, ":has(" + nameA + ")", true)));
1059
1060 boolean modified = false;
1061 for (final Element heading : headingsInnerA) {
1062 final List<Element> anchors = heading.select(nameA);
1063
1064 if (!anchors.isEmpty()) {
1065 anchorToId(heading, anchors.get(0));
1066 modified = true;
1067 }
1068 }
1069
1070
1071 final List<Element> headingsPreA = body.select(String.join(", ", concat(headNoIds, nameA + " + ", false)));
1072
1073 for (final Element heading : headingsPreA) {
1074 final Element anchor = heading.previousElementSibling();
1075 if (anchor != null) {
1076 anchorToId(heading, anchor);
1077 modified = true;
1078 }
1079 }
1080
1081
1082
1083
1084 final List<Element> anchorsPreH = body.select(String.join(", ", concat(headNoIds, " + " + nameA, true)));
1085
1086 for (final Element anchor : anchorsPreH) {
1087 final Element heading = anchor.previousElementSibling();
1088 if (heading != null) {
1089 anchorToId(heading, anchor);
1090 modified = true;
1091 }
1092 }
1093
1094 if (modified) {
1095 return body.html();
1096 } else {
1097
1098 return content;
1099 }
1100 }
1101
1102
1103
1104
1105
1106
1107
1108 private static void anchorToId(final Element heading, final Element anchor) {
1109
1110 if ("a".equals(anchor.tagName()) && heading.id().isEmpty()) {
1111 final String aName = anchor.attr("name");
1112 if (!aName.isEmpty()) {
1113
1114 heading.attr("id", aName);
1115
1116
1117 anchor.remove();
1118 }
1119 }
1120 }
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134 public static List<String> concat(final List<String> elements, final String text, final boolean append) {
1135 final List<String> concats = new ArrayList<>();
1136
1137 for (final String element : elements) {
1138 concats.add(append ? element + text : text + element);
1139 }
1140
1141 return concats;
1142 }
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167 public String ensureHeadingIds(final String pageType,
1168 final String currentPage,
1169 final String content,
1170 final String idSeparator) {
1171 final List<String> excludedPages = Arrays.asList("checkstyle-aggregate", "checkstyle");
1172
1173 final Element body = parse(content).body();
1174
1175
1176 if (excludedPages.contains(currentPage)) {
1177 return content;
1178 }
1179
1180
1181 final List<Element> idElems = body.select("*[id]");
1182
1183 final Set<String> ids = new HashSet<>();
1184 boolean modified = false;
1185 for (final Element idElem : idElems) {
1186
1187
1188 final String id = idElem.id();
1189 idElem.attr("id", slug(id, idSeparator, false));
1190 modified = true;
1191
1192 ids.add(idElem.id());
1193 }
1194
1195
1196 final List<String> headIds = concat(HEADINGS, "[id]", true);
1197
1198 final List<Element> headingIds = body.select(String.join(", ", headIds));
1199
1200 for (final Element heading : headingIds) {
1201 final String headingText = heading.text();
1202 String headingSlug = slug(headingText, idSeparator, true);
1203
1204 if (headingSlug.length() > SLUG_SIZE) {
1205 headingSlug = headingSlug.substring(0, SLUG_SIZE);
1206 }
1207 final String headingId = generateUniqueId(pageType, currentPage, ids, headingSlug);
1208
1209 heading.attr("id", headingId);
1210 }
1211
1212 final List<String> headNoIds = concat(HEADINGS, ":not([id], .no-anchor)", true);
1213
1214
1215 final List<Element> headingsNoId = body.select(String.join(", ", headNoIds));
1216
1217 if (!headingsNoId.isEmpty() || modified) {
1218 for (final Element heading : headingsNoId) {
1219
1220 final String headingText = heading.text();
1221 String headingSlug = slug(headingText, idSeparator, true);
1222
1223 if (headingSlug.length() > SLUG_SIZE) {
1224 headingSlug = headingSlug.substring(0, SLUG_SIZE);
1225 }
1226 final String headingId = generateUniqueId(pageType, currentPage, ids, headingSlug);
1227
1228 heading.attr("id", headingId);
1229 }
1230 }
1231
1232 return body.html();
1233 }
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248 private static String generateUniqueId(final String pageType,
1249 final String currentPage,
1250 final Set<String> ids,
1251 final String idBase) {
1252 String id = idBase;
1253 int counter = 1;
1254 while (ids.contains(id)) {
1255 id = idBase + String.valueOf(counter++);
1256 }
1257
1258
1259 ids.add(id);
1260 if ("frame".equals(pageType)) {
1261 id = currentPage + SEPARATOR_TOC + id;
1262 }
1263 return id;
1264 }
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278 public String fixTableHeads(final String content) {
1279
1280 final Element body = parse(content).body();
1281
1282 final List<Element> tables = body.select("table");
1283
1284 for (final Element table : tables) {
1285
1286 final List<Element> tableHeadRows = table.select("tbody > tr:has(th)");
1287
1288 if (tableHeadRows.size() == 1) {
1289
1290 for (final Element row : tableHeadRows) {
1291
1292
1293 row.remove();
1294
1295
1296 final Element thead = new Element(Tag.valueOf("thead"), "");
1297 thead.appendChild(row);
1298
1299 table.prependChild(thead);
1300 }
1301 }
1302 }
1303 return body.html();
1304 }
1305
1306
1307 private static final Pattern NONLATIN = Pattern.compile("[^\\w-]");
1308
1309
1310 private static final Pattern WHITESPACE = Pattern.compile("[\\s]");
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321 public static String slug(final String input) {
1322 return slug(input, DEFAULT_SLUG_SEPARATOR, true);
1323 }
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337 private static String slug(final String input, final String separator, boolean lowercase) {
1338 final String nowhitespace = WHITESPACE.matcher(input).replaceAll(separator);
1339 final String normalized = Normalizer.normalize(nowhitespace, Form.NFD);
1340 String slug = NONLATIN.matcher(normalized).replaceAll("");
1341 if (lowercase) {
1342 return slug.toLowerCase(Locale.ENGLISH);
1343 } else {
1344 return slug;
1345 }
1346 }
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364 public List<? extends IdElement> headingTree(final String content, final List<String> sections) {
1365
1366 final List<String> sectionContents = this.split(content, "hr");
1367 final List<String> headIds = concat(HEADINGS, "[id]:not(.no-anchor)", true);
1368 final List<HeadingItem> headingItems = new ArrayList<>();
1369
1370 int index = 0;
1371 for (final String sectionContent : sectionContents) {
1372 final String sectionType = index < sections.size() ? sections.get(index++) : "";
1373
1374
1375 if ("carousel".equals(sectionType)) {
1376 continue;
1377 }
1378 final Element body = parse(sectionContent).body();
1379
1380 final List<Element> headings = body.select(String.join(", ", headIds));
1381 for (final Element heading : headings) {
1382 if (LOGGER.isTraceEnabled()) {
1383 LOGGER.trace("Found heading: {} - {}", heading.id(), heading.text());
1384 }
1385 headingItems.add(new HeadingItem(heading.id(), heading.nodeName(), heading.text(), headingIndex(heading)));
1386 }
1387 }
1388
1389 final List<HeadingItem> topHeadings = new ArrayList<>();
1390 final Stack<HeadingItem> parentHeadings = new Stack<>();
1391
1392 for (final HeadingItem heading : headingItems) {
1393
1394 while (!parentHeadings.isEmpty() && parentHeadings.peek().headingLevel >= heading.headingLevel) {
1395 parentHeadings.pop();
1396 }
1397
1398 if (parentHeadings.isEmpty()) {
1399
1400 topHeadings.add(heading);
1401 } else {
1402
1403 parentHeadings.peek().children.add(heading);
1404 }
1405
1406
1407 parentHeadings.push(heading);
1408 }
1409
1410 return topHeadings;
1411 }
1412
1413
1414
1415
1416
1417
1418
1419 private static int headingIndex(final Element element) {
1420 final String tagName = element.tagName();
1421 if (tagName.startsWith("h")) {
1422 try {
1423 return Integer.parseInt(tagName.substring(1));
1424 } catch (final Exception ex) {
1425 throw new IllegalArgumentException("Must be a header tag: " + tagName, ex);
1426 }
1427 } else {
1428 throw new IllegalArgumentException("Must be a header tag: " + tagName);
1429 }
1430 }
1431
1432
1433
1434
1435 private static final class HeadingItem implements IdElement {
1436
1437
1438 private final String id;
1439
1440
1441 private final String tagName;
1442
1443
1444 private final String text;
1445
1446
1447 private final int headingLevel;
1448
1449
1450 private final List<HeadingItem> children = new ArrayList<>();
1451
1452 private HeadingItem(final String id, final String tagName, final String text, final int headingLevel) {
1453 this.id = id;
1454 this.tagName = tagName;
1455 this.text = text;
1456 this.headingLevel = headingLevel;
1457 }
1458
1459 @Override
1460 public String getId() {
1461 return id;
1462 }
1463
1464 @Override
1465 public String getTagName() {
1466 return tagName;
1467 }
1468
1469 @Override
1470 public String getText() {
1471 return text;
1472 }
1473
1474 @Override
1475 public List<HeadingItem> getItems() {
1476 return Collections.unmodifiableList(children);
1477 }
1478
1479 @Override
1480 public int getHeadingLevel() {
1481 return headingLevel;
1482 }
1483
1484 @Override
1485 public String toString() {
1486 return ToStringBuilder.reflectionToString(this);
1487 }
1488 }
1489
1490
1491
1492
1493
1494
1495
1496 public interface IdElement {
1497
1498
1499
1500
1501
1502
1503 String getId();
1504
1505
1506
1507
1508 String getTagName();
1509
1510
1511
1512
1513
1514
1515 String getText();
1516
1517
1518
1519
1520 int getHeadingLevel();
1521
1522
1523
1524
1525
1526
1527 List<? extends IdElement> getItems();
1528 }
1529 }