11package org .nibor .autolink ;
22
3- import org .nibor .autolink .internal .EmailScanner ;
3+ import org .nibor .autolink .internal .* ;
44import org .nibor .autolink .internal .Scanner ;
5- import org .nibor .autolink .internal .UrlScanner ;
6- import org .nibor .autolink .internal .WwwScanner ;
75
86import java .util .*;
97
@@ -33,6 +31,7 @@ public static Builder builder() {
3331 *
3432 * @param input the input text, must not be null
3533 * @return a lazy iterable for the links in order that they appear in the input, never null
34+ * @see #extractSpans(CharSequence) extractSpans to also get spans for the plain text pieces of the input
3635 */
3736 public Iterable <LinkSpan > extractLinks (final CharSequence input ) {
3837 if (input == null ) {
@@ -46,6 +45,28 @@ public Iterator<LinkSpan> iterator() {
4645 };
4746 }
4847
48+ /**
49+ * Extract spans from the input text. A span is a substring of the input and represents either a link
50+ * (see {@link LinkSpan}) or plain text outside a link.
51+ * <p>
52+ * Using this is more convenient than {@link #extractLinks} if you want to transform the whole input text to
53+ * a different format.
54+ *
55+ * @param input the input text, must not be null
56+ * @return a lazy iterable for the spans in order that they appear in the input, never null
57+ */
58+ public Iterable <Span > extractSpans (final CharSequence input ) {
59+ if (input == null ) {
60+ throw new NullPointerException ("input must not be null" );
61+ }
62+ return new Iterable <Span >() {
63+ @ Override
64+ public Iterator <Span > iterator () {
65+ return new SpanIterator (input , new LinkIterator (input ));
66+ }
67+ };
68+ }
69+
4970 private Scanner trigger (char c ) {
5071 switch (c ) {
5172 case ':' :
@@ -83,7 +104,7 @@ public Builder linkTypes(Set<LinkType> linkTypes) {
83104
84105 /**
85106 * @param emailDomainMustHaveDot true if the domain in an email address is required to have more than one part,
86- * false if it can also just have single part (e.g. foo@com); true by default
107+ * false if it can also just have single part (e.g. foo@com); true by default
87108 * @return this builder
88109 */
89110 public Builder emailDomainMustHaveDot (boolean emailDomainMustHaveDot ) {
@@ -160,4 +181,60 @@ private void setNext() {
160181 }
161182 }
162183 }
184+
185+ private class SpanIterator implements Iterator <Span > {
186+
187+ private final CharSequence input ;
188+ private final LinkIterator linkIterator ;
189+
190+ private int index = 0 ;
191+ private LinkSpan nextLink = null ;
192+
193+ public SpanIterator (CharSequence input , LinkIterator linkIterator ) {
194+ this .input = input ;
195+ this .linkIterator = linkIterator ;
196+ }
197+
198+ @ Override
199+ public boolean hasNext () {
200+ return index < input .length ();
201+ }
202+
203+ private Span nextTextSpan (int endIndex ) {
204+ Span span = new SpanImpl (index , endIndex );
205+ index = endIndex ;
206+ return span ;
207+ }
208+
209+ @ Override
210+ public Span next () {
211+ if (!hasNext ()) {
212+ throw new NoSuchElementException ();
213+ }
214+
215+ if (nextLink == null ) {
216+ if (linkIterator .hasNext ()) {
217+ nextLink = linkIterator .next ();
218+ } else {
219+ return nextTextSpan (input .length ());
220+ }
221+ }
222+
223+ if (index < nextLink .getBeginIndex ()) {
224+ // text before link, return plain
225+ return nextTextSpan (nextLink .getBeginIndex ());
226+ } else {
227+ // at link, return it and make sure we continue after it next time
228+ Span span = nextLink ;
229+ index = nextLink .getEndIndex ();
230+ nextLink = null ;
231+ return span ;
232+ }
233+ }
234+
235+ @ Override
236+ public void remove () {
237+ throw new UnsupportedOperationException ("remove" );
238+ }
239+ }
163240}
0 commit comments