View Javadoc
1   /**
2    * Diff Match and Patch
3    *
4    * Copyright 2006 Google Inc.
5    * http://code.google.com/p/google-diff-match-patch/
6    *
7    * Licensed under the Apache License, Version 2.0 (the "License");
8    * you may not use this file except in compliance with the License.
9    * You may obtain a copy of the License at
10   *
11   *   http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  package com.jsql.model.injection.strategy.blind.patch;
20  
21  import com.jsql.util.LogLevelUtil;
22  import org.apache.commons.lang3.StringUtils;
23  import org.apache.logging.log4j.LogManager;
24  import org.apache.logging.log4j.Logger;
25  
26  import java.net.URLDecoder;
27  import java.net.URLEncoder;
28  import java.nio.charset.StandardCharsets;
29  import java.util.*;
30  import java.util.regex.Matcher;
31  import java.util.regex.Pattern;
32  
33  /*
34   * Functions for diff, match and patch.
35   * Computes the difference between two texts to create a patch.
36   * Applies the patch onto another text, allowing for errors.
37   *
38   * @author fraser@google.com (Neil Fraser)
39   */
40  
41  /**
42   * Class containing the diff, match and patch methods.
43   * Also contains the behaviour settings.
44   */
45  public class DiffMatchPatch {
46  
47      /**
48       * Log4j logger sent to view.
49       */
50      private static final Logger LOGGER = LogManager.getRootLogger();
51  
52      // Defaults.
53      // Set these on your diff_match_patch instance to override the defaults.
54  
55      /**
56       * Number of seconds to map a diff before giving up (0 for infinity).
57       */
58      public static final float DIFF_TIMEOUT = 1.0f;
59  
60      /**
61       * Cost of an empty edit operation in terms of edit characters.
62       */
63      public static final short DIFF_EDIT_COST = 4;
64  
65      /**
66       * At what point is no match declared (0.0 = perfection, 1.0 = very loose).
67       */
68      public static final float MATCH_THRESHOLD = 0.5f;
69  
70      /**
71       * How far to search for a match (0 = exact location, 1000+ = broad match).
72       * A match this many characters away from the expected location will add
73       * 1.0 to the score (0.0 is a perfect match).
74       */
75      public static final int MATCH_DISTANCE = 1000;
76  
77      /**
78       * When deleting a large block of text (over ~64 characters), how close do
79       * the contents have to be to match the expected contents. (0.0 = perfection,
80       * 1.0 = very loose).  Note that Match_Threshold controls how closely the
81       * end points of a delete need to match.
82       */
83      public static final float PATCH_DELETE_THRESHOLD = 0.5f;
84  
85      /**
86       * Chunk size for context length.
87       */
88      public static final short PATCH_MARGIN = 4;
89  
90      /**
91       * The number of bits in an int.
92       */
93      private static final short MATCH_MAX_BITS = 32;
94  
95      // Define some regex patterns for matching boundaries.
96      private static final Pattern BLANK_LINE_END = Pattern.compile("\\n\\r?\\n\\Z", Pattern.DOTALL);
97      private static final Pattern BLANK_LINE_START = Pattern.compile("\\A\\r?\\n\\r?\\n", Pattern.DOTALL);
98  
99      /**
100      * Internal class for returning results from diff_linesToChars().
101      * Other less paranoid languages just use a three-element array.
102      */
103     protected static class LinesToCharsResult {
104         protected final String chars1;
105         protected final String chars2;
106         protected final List<String> lineArray;
107 
108         protected LinesToCharsResult(String chars1, String chars2,
109                                      List<String> lineArray) {
110             this.chars1 = chars1;
111             this.chars2 = chars2;
112             this.lineArray = lineArray;
113         }
114     }
115 
116     //  DIFF FUNCTIONS
117 
118     /**
119      * The data structure representing a diff is a Linked list of Diff objects:
120      * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"),
121      *  Diff(Operation.EQUAL, " world.")}
122      * which means: delete "Hello", add "Goodbye" and keep " world."
123      */
124     public enum Operation {
125         DELETE, INSERT, EQUAL
126     }
127 
128     /**
129      * Find the differences between two texts.
130      * Run a faster, slightly less optimal diff.
131      * This method allows the 'checklines' of diff_main() to be optional.
132      * Most of the time checklines is wanted, so default to true.
133      * @param text1 Old string to be diffed.
134      * @param text2 New string to be diffed.
135      * @return Linked List of Diff objects.
136      */
137     public List<Diff> diffMain(String text1, String text2) {
138         return this.diffMain(text1, text2, true);
139     }
140 
141     /**
142      * Find the differences between two texts.
143      * @param text1 Old string to be diffed.
144      * @param text2 New string to be diffed.
145      * @param checklines Speedup flag.  If false, then don't run a
146      *     line-level diff first to identify the changed areas.
147      *     If true, then run a faster slightly less optimal diff.
148      * @return Linked List of Diff objects.
149      */
150     public LinkedList<Diff> diffMain(String text1, String text2, boolean checklines) {
151         // Set a deadline by which time the diff must be complete.
152         long deadline = System.currentTimeMillis() + (long) (DiffMatchPatch.DIFF_TIMEOUT * 1000);
153         return this.diffMain(text1, text2, checklines, deadline);
154     }
155 
156     /**
157      * Find the differences between two texts.  Simplifies the problem by
158      * stripping any common prefix or suffix off the texts before diffing.
159      * @param valueText1 Old string to be diffed.
160      * @param valueText2 New string to be diffed.
161      * @param checklines Speedup flag.  If false, then don't run a
162      *     line-level diff first to identify the changed areas.
163      *     If true, then run a faster slightly less optimal diff.
164      * @param deadline Time when the diff should be complete by.  Used
165      *     internally for recursive calls.  Users should set DiffTimeout instead.
166      * @return Linked List of Diff objects.
167      */
168     private LinkedList<Diff> diffMain(String valueText1, String valueText2, boolean checklines, long deadline) {
169 
170         String text1 = valueText1;
171         String text2 = valueText2;
172 
173         // Check for null inputs.
174         if (text1 == null || text2 == null) {
175             throw new IllegalArgumentException("Null inputs. (diff_main)");
176         }
177 
178         // Check for equality (speedup).
179         LinkedList<Diff> diffs;
180         if (text1.equals(text2)) {
181             diffs = new LinkedList<>();
182             if (!text1.isEmpty()) {
183                 diffs.add(new Diff(Operation.EQUAL, text1));
184             }
185             return diffs;
186         }
187 
188         // Trim off common prefix (speedup).
189         int commonlength = this.diffCommonPrefix(text1, text2);
190         String commonprefix = text1.substring(0, commonlength);
191         text1 = text1.substring(commonlength);
192         text2 = text2.substring(commonlength);
193 
194         // Trim off common suffix (speedup).
195         commonlength = this.diffCommonSuffix(text1, text2);
196         String commonsuffix = text1.substring(text1.length() - commonlength);
197         text1 = text1.substring(0, text1.length() - commonlength);
198         text2 = text2.substring(0, text2.length() - commonlength);
199 
200         // Compute the diff on the middle block.
201         diffs = this.diffCompute(text1, text2, checklines, deadline);
202 
203         // Restore the prefix and suffix.
204         if (!commonprefix.isEmpty()) {
205             diffs.addFirst(new Diff(Operation.EQUAL, commonprefix));
206         }
207         if (!commonsuffix.isEmpty()) {
208             diffs.addLast(new Diff(Operation.EQUAL, commonsuffix));
209         }
210 
211         this.diffCleanupMerge(diffs);
212         return diffs;
213     }
214 
215     /**
216      * Find the differences between two texts.  Assumes that the texts do not
217      * have any common prefix or suffix.
218      * @param text1 Old string to be diffed.
219      * @param text2 New string to be diffed.
220      * @param checklines Speedup flag.  If false, then don't run a
221      *     line-level diff first to identify the changed areas.
222      *     If true, then run a faster slightly less optimal diff.
223      * @param deadline Time when the diff should be complete by.
224      * @return Linked List of Diff objects.
225      */
226     private LinkedList<Diff> diffCompute(String text1, String text2, boolean checklines, long deadline) {
227 
228         LinkedList<Diff> diffs = new LinkedList<>();
229 
230         if (text1.isEmpty()) {
231             // Just add some text (speedup).
232             diffs.add(new Diff(Operation.INSERT, text2));
233             return diffs;
234         }
235 
236         if (text2.isEmpty()) {
237             // Just delete some text (speedup).
238             diffs.add(new Diff(Operation.DELETE, text1));
239             return diffs;
240         }
241 
242         {
243             // New scope to garbage collect longtext and shorttext.
244             String longtext = text1.length() > text2.length() ? text1 : text2;
245             String shorttext = text1.length() > text2.length() ? text2 : text1;
246             int i = longtext.indexOf(shorttext);
247             if (i != -1) {
248                 // Shorter text is inside the longer text (speedup).
249                 Operation op = (text1.length() > text2.length()) ?
250                         Operation.DELETE : Operation.INSERT;
251                 diffs.add(new Diff(op, longtext.substring(0, i)));
252                 diffs.add(new Diff(Operation.EQUAL, shorttext));
253                 diffs.add(new Diff(op, longtext.substring(i + shorttext.length())));
254                 return diffs;
255             }
256 
257             if (shorttext.length() == 1) {
258                 // Single character string.
259                 // After the previous speedup, the character can't be an equality.
260                 diffs.add(new Diff(Operation.DELETE, text1));
261                 diffs.add(new Diff(Operation.INSERT, text2));
262                 return diffs;
263             }
264         }
265 
266         // Check to see if the problem can be split in two.
267         String[] hm = this.diffHalfMatch(text1, text2);
268         if (hm != null) {
269             // A half-match was found, sort out the return data.
270             String text1A = hm[0];
271             String text1B = hm[1];
272             String text2A = hm[2];
273             String text2B = hm[3];
274             String midCommon = hm[4];
275             // Send both pairs off for separate processing.
276             LinkedList<Diff> diffsA = this.diffMain(text1A, text2A, checklines, deadline);
277             List<Diff> diffsB = this.diffMain(text1B, text2B, checklines, deadline);
278             // Merge the results.
279             diffs = diffsA;
280             diffs.add(new Diff(Operation.EQUAL, midCommon));
281             diffs.addAll(diffsB);
282             return diffs;
283         }
284 
285         if (checklines && text1.length() > 100 && text2.length() > 100) {
286             return this.diffLineMode(text1, text2, deadline);
287         }
288 
289         return this.diffBisect(text1, text2, deadline);
290     }
291 
292     /**
293      * Do a quick line-level diff on both strings, then rediff the parts for
294      * greater accuracy.
295      * This speedup can produce non-minimal diffs.
296      * @param valueText1 Old string to be diffed.
297      * @param valueText2 New string to be diffed.
298      * @param deadline Time when the diff should be complete by.
299      * @return Linked List of Diff objects.
300      */
301     private LinkedList<Diff> diffLineMode(String valueText1, String valueText2, long deadline) {
302 
303         // Scan the text on a line-by-line basis first.
304         LinesToCharsResult b = this.diffLinesToChars(valueText1, valueText2);
305         String text1 = b.chars1;
306         String text2 = b.chars2;
307         List<String> linearray = b.lineArray;
308 
309         LinkedList<Diff> diffs = this.diffMain(text1, text2, false, deadline);
310 
311         // Convert the diff back to original text.
312         this.diffCharsToLines(diffs, linearray);
313         // Eliminate freak matches (e.g. blank lines)
314         this.diffCleanupSemantic(diffs);
315 
316         // Rediff any replacement blocks, this time character-by-character.
317         // Add a dummy entry at the end.
318         diffs.add(new Diff(Operation.EQUAL, StringUtils.EMPTY));
319         int countDelete = 0;
320         int countInsert = 0;
321         StringBuilder textDelete = new StringBuilder();
322         StringBuilder textInsert = new StringBuilder();
323         ListIterator<Diff> pointer = diffs.listIterator();
324         Diff thisDiff = pointer.next();
325 
326         while (thisDiff != null) {
327             switch (thisDiff.getOperation()) {
328                 case INSERT:
329                     countInsert++;
330                     textInsert.append(thisDiff.getText());
331                     break;
332                 case DELETE:
333                     countDelete++;
334                     textDelete.append(thisDiff.getText());
335                     break;
336                 case EQUAL:
337                     // Upon reaching an equality, check for prior redundancies.
338                     if (countDelete >= 1 && countInsert >= 1) {
339                         // Delete the offending records and add the merged ones.
340                         pointer.previous();
341                         for (int j = 0; j < countDelete + countInsert; j++) {
342                             pointer.previous();
343                             pointer.remove();
344                         }
345                         for (Diff newDiff : this.diffMain(textDelete.toString(), textInsert.toString(), false, deadline)) {
346                             pointer.add(newDiff);
347                         }
348                     }
349                     countInsert = 0;
350                     countDelete = 0;
351                     textDelete.setLength(0);
352                     textInsert.setLength(0);
353                     break;
354             }
355             thisDiff = pointer.hasNext() ? pointer.next() : null;
356         }
357         diffs.removeLast();  // Remove the dummy entry at the end.
358 
359         return diffs;
360     }
361 
362     /**
363      * Find the 'middle snake' of a diff, split the problem in two
364      * and return the recursively constructed diff.
365      * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
366      * @param text1 Old string to be diffed.
367      * @param text2 New string to be diffed.
368      * @param deadline Time at which to bail if not yet complete.
369      * @return LinkedList of Diff objects.
370      */
371     protected LinkedList<Diff> diffBisect(String text1, String text2, long deadline) {
372 
373         // Cache the text lengths to prevent multiple calls.
374         int text1Length = text1.length();
375         int text2Length = text2.length();
376         int maxD = (text1Length + text2Length + 1) / 2;
377         int vLength = 2 * maxD;
378         int[] v1 = new int[vLength];
379         int[] v2 = new int[vLength];
380         for (int x = 0; x < vLength; x++) {
381             v1[x] = -1;
382             v2[x] = -1;
383         }
384         v1[maxD + 1] = 0;
385         v2[maxD + 1] = 0;
386         int delta = text1Length - text2Length;
387         // If the total number of characters is odd, then the front path will
388         // collide with the reverse path.
389         boolean front = delta % 2 != 0;
390         // Offsets for start and end of k loop.
391         // Prevents mapping of space beyond the grid.
392         int k1start = 0;
393         int k1end = 0;
394         int k2start = 0;
395         int k2end = 0;
396 
397         for (int d = 0; d < maxD; d++) {
398             // Bail out if deadline is reached.
399             if (System.currentTimeMillis() > deadline) {
400                 break;
401             }
402 
403             // Walk the front path one step.
404             for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2) {
405                 int k1Offset = maxD + k1;
406                 int x1;
407                 if (k1 == -d || (k1 != d && v1[k1Offset - 1] < v1[k1Offset + 1])) {
408                     x1 = v1[k1Offset + 1];
409                 } else {
410                     x1 = v1[k1Offset - 1] + 1;
411                 }
412                 int y1 = x1 - k1;
413                 while (x1 < text1Length && y1 < text2Length
414                         && text1.charAt(x1) == text2.charAt(y1)) {
415                     x1++;
416                     y1++;
417                 }
418                 v1[k1Offset] = x1;
419                 if (x1 > text1Length) {
420                     // Ran off the right of the graph.
421                     k1end += 2;
422                 } else if (y1 > text2Length) {
423                     // Ran off the bottom of the graph.
424                     k1start += 2;
425                 } else if (front) {
426                     int k2Offset = maxD + delta - k1;
427                     if (k2Offset >= 0 && k2Offset < vLength && v2[k2Offset] != -1) {
428                         // Mirror x2 onto top-left coordinate system.
429                         int x2 = text1Length - v2[k2Offset];
430                         if (x1 >= x2) {
431                             // Overlap detected.
432                             return this.diffBisectSplit(text1, text2, x1, y1, deadline);
433                         }
434                     }
435                 }
436             }
437 
438             // Walk the reverse path one step.
439             for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2) {
440                 int k2Offset = maxD + k2;
441                 int x2;
442                 if (k2 == -d || (k2 != d && v2[k2Offset - 1] < v2[k2Offset + 1])) {
443                     x2 = v2[k2Offset + 1];
444                 } else {
445                     x2 = v2[k2Offset - 1] + 1;
446                 }
447                 int y2 = x2 - k2;
448                 while (x2 < text1Length && y2 < text2Length
449                         && text1.charAt(text1Length - x2 - 1)
450                         == text2.charAt(text2Length - y2 - 1)) {
451                     x2++;
452                     y2++;
453                 }
454                 v2[k2Offset] = x2;
455                 if (x2 > text1Length) {
456                     // Ran off the left of the graph.
457                     k2end += 2;
458                 } else if (y2 > text2Length) {
459                     // Ran off the top of the graph.
460                     k2start += 2;
461                 } else if (!front) {
462                     int k1Offset = maxD + delta - k2;
463                     if (k1Offset >= 0 && k1Offset < vLength && v1[k1Offset] != -1) {
464                         int x1 = v1[k1Offset];
465                         int y1 = maxD + x1 - k1Offset;
466                         // Mirror x2 onto top-left coordinate system.
467                         x2 = text1Length - x2;
468                         if (x1 >= x2) {
469                             // Overlap detected.
470                             return this.diffBisectSplit(text1, text2, x1, y1, deadline);
471                         }
472                     }
473                 }
474             }
475         }
476         // Diff took too long and hit the deadline or
477         // number of diffs equals number of characters, no commonality at all.
478         LinkedList<Diff> diffs = new LinkedList<>();
479         diffs.add(new Diff(Operation.DELETE, text1));
480         diffs.add(new Diff(Operation.INSERT, text2));
481         return diffs;
482     }
483 
484     /**
485      * Given the location of the 'middle snake', split the diff in two parts
486      * and recurse.
487      * @param text1 Old string to be diffed.
488      * @param text2 New string to be diffed.
489      * @param x Index of split point in text1.
490      * @param y Index of split point in text2.
491      * @param deadline Time at which to bail if not yet complete.
492      * @return LinkedList of Diff objects.
493      */
494     private LinkedList<Diff> diffBisectSplit(String text1, String text2, int x, int y, long deadline) {
495         String text1a = text1.substring(0, x);
496         String text2a = text2.substring(0, y);
497         String text1b = text1.substring(x);
498         String text2b = text2.substring(y);
499 
500         // Compute both diffs serially.
501         LinkedList<Diff> diffs = this.diffMain(text1a, text2a, false, deadline);
502         List<Diff> diffsb = this.diffMain(text1b, text2b, false, deadline);
503 
504         diffs.addAll(diffsb);
505         return diffs;
506     }
507 
508     /**
509      * Split two texts into a list of strings.  Reduce the texts to a string of
510      * hashes where each Unicode character represents one line.
511      * @param text1 First string.
512      * @param text2 Second string.
513      * @return An object containing the encoded text1, the encoded text2 and
514      *     the List of unique strings.  The zeroth element of the List of
515      *     unique strings is intentionally blank.
516      */
517     protected LinesToCharsResult diffLinesToChars(String text1, String text2) {
518         List<String> lineArray = new ArrayList<>();
519         Map<String, Integer> lineHash = new HashMap<>();
520         // e.g. linearray[4] == "Hello\n"
521         // e.g. linehash.get("Hello\n") == 4
522 
523         // "\x00" is a valid character, but various debuggers don't like it.
524         // So we'll insert a junk entry to avoid generating a null character.
525         lineArray.add(StringUtils.EMPTY);
526 
527         String chars1 = this.diffLinesToCharsMunge(text1, lineArray, lineHash);
528         String chars2 = this.diffLinesToCharsMunge(text2, lineArray, lineHash);
529         return new LinesToCharsResult(chars1, chars2, lineArray);
530     }
531 
532     /**
533      * Split a text into a list of strings.  Reduce the texts to a string of
534      * hashes where each Unicode character represents one line.
535      * @param text String to encode.
536      * @param lineArray List of unique strings.
537      * @param lineHash Map of strings to indices.
538      * @return Encoded string.
539      */
540     private String diffLinesToCharsMunge(String text, List<String> lineArray,
541                                          Map<String, Integer> lineHash) {
542 
543         int lineStart = 0;
544         int lineEnd = -1;
545         String line;
546         StringBuilder chars = new StringBuilder();
547         // Walk the text, pulling out a substring for each line.
548         // text.split('\n') would would temporarily double our memory footprint.
549         // Modifying text would create many large strings to garbage collect.
550         while (lineEnd < text.length() - 1) {
551             lineEnd = text.indexOf('\n', lineStart);
552             if (lineEnd == -1) {
553                 lineEnd = text.length() - 1;
554             }
555             line = text.substring(lineStart, lineEnd + 1);
556             lineStart = lineEnd + 1;
557 
558             if (lineHash.containsKey(line)) {
559                 chars.append((char) (int) lineHash.get(line));
560             } else {
561                 lineArray.add(line);
562                 lineHash.put(line, lineArray.size() - 1);
563                 chars.append((char) (lineArray.size() - 1));
564             }
565         }
566         return chars.toString();
567     }
568 
569     /**
570      * Rehydrate the text in a diff from a string of line hashes to real lines of
571      * text.
572      * @param diffs LinkedList of Diff objects.
573      * @param lineArray List of unique strings.
574      */
575     protected void diffCharsToLines(List<Diff> diffs, List<String> lineArray) {
576         StringBuilder text;
577         for (Diff diff : diffs) {
578             text = new StringBuilder();
579             for (int y = 0; y < diff.getText().length(); y++) {
580                 text.append(lineArray.get(diff.getText().charAt(y)));
581             }
582             diff.setText(text.toString());
583         }
584     }
585 
586     /**
587      * Determine the common prefix of two strings
588      * @param text1 First string.
589      * @param text2 Second string.
590      * @return The number of characters common to the start of each string.
591      */
592     public int diffCommonPrefix(String text1, String text2) {
593         // Performance analysis: http://neil.fraser.name/news/2007/10/09/
594         int n = Math.min(text1.length(), text2.length());
595         for (int i = 0; i < n; i++) {
596             if (text1.charAt(i) != text2.charAt(i)) {
597                 return i;
598             }
599         }
600         return n;
601     }
602 
603     /**
604      * Determine the common suffix of two strings
605      * @param text1 First string.
606      * @param text2 Second string.
607      * @return The number of characters common to the end of each string.
608      */
609     public int diffCommonSuffix(String text1, String text2) {
610         // Performance analysis: http://neil.fraser.name/news/2007/10/09/
611         int text1Length = text1.length();
612         int text2Length = text2.length();
613         int n = Math.min(text1Length, text2Length);
614         for (int i = 1; i <= n; i++) {
615             if (text1.charAt(text1Length - i) != text2.charAt(text2Length - i)) {
616                 return i - 1;
617             }
618         }
619         return n;
620     }
621 
622     /**
623      * Determine if the suffix of one string is the prefix of another.
624      * @param valueText1 First string.
625      * @param valueText2 Second string.
626      * @return The number of characters common to the end of the first
627      *     string and the start of the second string.
628      */
629     protected int diffCommonOverlap(String valueText1, String valueText2) {
630 
631         String text1 = valueText1;
632         String text2 = valueText2;
633 
634         // Cache the text lengths to prevent multiple calls.
635         int text1Length = text1.length();
636         int text2Length = text2.length();
637         // Eliminate the null case.
638         if (text1Length == 0 || text2Length == 0) {
639             return 0;
640         }
641         // Truncate the longer string.
642         if (text1Length > text2Length) {
643             text1 = text1.substring(text1Length - text2Length);
644         } else if (text1Length < text2Length) {
645             text2 = text2.substring(0, text1Length);
646         }
647         int textLength = Math.min(text1Length, text2Length);
648         // Quick check for the worst case.
649         if (text1.equals(text2)) {
650             return textLength;
651         }
652 
653         // Start by looking for a single character match
654         // and increase length until no match is found.
655         // Performance analysis: http://neil.fraser.name/news/2010/11/04/
656         int best = 0;
657         int length = 1;
658         while (true) {
659             String pattern = text1.substring(textLength - length);
660             int found = text2.indexOf(pattern);
661             if (found == -1) {
662                 return best;
663             }
664             length += found;
665             if (found == 0 || text1.substring(textLength - length).equals(
666                     text2.substring(0, length))) {
667                 best = length;
668                 length++;
669             }
670         }
671     }
672 
673     /**
674      * Do the two texts share a substring which is at least half the length of
675      * the longer text?
676      * This speedup can produce non-minimal diffs.
677      * @param text1 First string.
678      * @param text2 Second string.
679      * @return Five element String array, containing the prefix of text1, the
680      *     suffix of text1, the prefix of text2, the suffix of text2 and the
681      *     common middle.  Or null if there was no match.
682      */
683     protected String[] diffHalfMatch(String text1, String text2) {
684 
685         String longtext = text1.length() > text2.length() ? text1 : text2;
686         String shorttext = text1.length() > text2.length() ? text2 : text1;
687         if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length()) {
688             return null;  // Pointless.
689         }
690 
691         // First check if the second quarter is the seed for a half-match.
692         String[] hm1 = this.diffHalfMatchI(longtext, shorttext, (longtext.length() + 3) / 4);
693         // Check again based on the third quarter.
694         String[] hm2 = this.diffHalfMatchI(longtext, shorttext, (longtext.length() + 1) / 2);
695         String[] hm;
696         if (hm1 == null && hm2 == null) {
697             return null;
698         } else if (hm2 == null) {
699             hm = hm1;
700         } else if (hm1 == null) {
701             hm = hm2;
702         } else {
703             // Both matched.  Select the longest.
704             hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2;
705         }
706 
707         // A half-match was found, sort out the return data.
708         if (text1.length() > text2.length()) {
709             return hm;
710         } else {
711             return new String[]{hm[2], hm[3], hm[0], hm[1], hm[4]};
712         }
713     }
714 
715     /**
716      * Does a substring of shorttext exist within longtext such that the
717      * substring is at least half the length of longtext?
718      * @param longtext Longer string.
719      * @param shorttext Shorter string.
720      * @param i Start index of quarter length substring within longtext.
721      * @return Five element String array, containing the prefix of longtext, the
722      *     suffix of longtext, the prefix of shorttext, the suffix of shorttext
723      *     and the common middle.  Or null if there was no match.
724      */
725     private String[] diffHalfMatchI(String longtext, String shorttext, int i) {
726 
727         // Start with a 1/4 length substring at position i as a seed.
728         String seed = longtext.substring(i, i + longtext.length() / 4);
729         int j = -1;
730         String bestCommon = StringUtils.EMPTY;
731         String bestLongtextA = StringUtils.EMPTY;
732         String bestLongtextB = StringUtils.EMPTY;
733         String bestShorttextA = StringUtils.EMPTY;
734         String bestShorttextB = StringUtils.EMPTY;
735         while ((j = shorttext.indexOf(seed, j + 1)) != -1) {
736             int prefixLength = this.diffCommonPrefix(longtext.substring(i),
737                     shorttext.substring(j));
738             int suffixLength = this.diffCommonSuffix(longtext.substring(0, i),
739                     shorttext.substring(0, j));
740             if (bestCommon.length() < suffixLength + prefixLength) {
741                 bestCommon = shorttext.substring(j - suffixLength, j)
742                         + shorttext.substring(j, j + prefixLength);
743                 bestLongtextA = longtext.substring(0, i - suffixLength);
744                 bestLongtextB = longtext.substring(i + prefixLength);
745                 bestShorttextA = shorttext.substring(0, j - suffixLength);
746                 bestShorttextB = shorttext.substring(j + prefixLength);
747             }
748         }
749         if (bestCommon.length() * 2 >= longtext.length()) {
750             return new String[]{bestLongtextA, bestLongtextB,
751                     bestShorttextA, bestShorttextB, bestCommon};
752         } else {
753             return null;
754         }
755     }
756 
757     /**
758      * Reduce the number of edits by eliminating semantically trivial equalities.
759      * @param diffs LinkedList of Diff objects.
760      */
761     public void diffCleanupSemantic(LinkedList<Diff> diffs) {
762 
763         if (diffs.isEmpty()) {
764             return;
765         }
766         boolean changes = false;
767         // Synchronized Stack to avoid Exception
768         Stack<Diff> equalities = new Stack<>();  // Stack of qualities.
769         String lastequality = null; // Always equal to equalities.lastElement().text
770         ListIterator<Diff> pointer = diffs.listIterator();
771         // Number of characters that changed prior to the equality.
772         int lengthInsertions1 = 0;
773         int lengthDeletions1 = 0;
774         // Number of characters that changed after the equality.
775         int lengthInsertions2 = 0;
776         int lengthDeletions2 = 0;
777         Diff thisDiff = pointer.next();
778 
779         while (thisDiff != null) {
780             if (thisDiff.getOperation() == Operation.EQUAL) {
781                 // Equality found.
782                 equalities.push(thisDiff);
783                 lengthInsertions1 = lengthInsertions2;
784                 lengthDeletions1 = lengthDeletions2;
785                 lengthInsertions2 = 0;
786                 lengthDeletions2 = 0;
787                 lastequality = thisDiff.getText();
788             } else {
789                 // An insertion or deletion.
790                 if (thisDiff.getOperation() == Operation.INSERT) {
791                     lengthInsertions2 += thisDiff.getText().length();
792                 } else {
793                     lengthDeletions2 += thisDiff.getText().length();
794                 }
795                 // Eliminate an equality that is smaller or equal to the edits on both
796                 // sides of it.
797                 if (
798                         lastequality != null
799                                 && lastequality.length() <= Math.max(lengthInsertions1, lengthDeletions1)
800                                 && lastequality.length() <= Math.max(lengthInsertions2, lengthDeletions2)
801                 ) {
802                     // Walk back to offending equality.
803                     while (thisDiff != equalities.lastElement()) {
804                         thisDiff = pointer.previous();
805                     }
806                     pointer.next();
807 
808                     // Replace equality with a delete.
809                     pointer.set(new Diff(Operation.DELETE, lastequality));
810                     // Insert a corresponding an insert.
811                     pointer.add(new Diff(Operation.INSERT, lastequality));
812 
813                     equalities.pop();  // Throw away the equality we just deleted.
814                     if (!equalities.empty()) {
815                         // Throw away the previous equality (it needs to be reevaluated).
816                         equalities.pop();
817                     }
818                     if (equalities.empty()) {
819                         // There are no previous equalities, walk back to the start.
820                         while (pointer.hasPrevious()) {
821                             pointer.previous();
822                         }
823                     } else {
824                         // There is a safe equality we can fall back to.
825                         thisDiff = equalities.lastElement();
826                         while (thisDiff != pointer.previous()) {
827                             // Intentionally empty loop.
828                         }
829                     }
830 
831                     lengthInsertions1 = 0;  // Reset the counters.
832                     lengthInsertions2 = 0;
833                     lengthDeletions1 = 0;
834                     lengthDeletions2 = 0;
835                     lastequality = null;
836                     changes = true;
837                 }
838             }
839             thisDiff = pointer.hasNext() ? pointer.next() : null;
840         }
841 
842         // Normalize the diff.
843         if (changes) {
844             this.diffCleanupMerge(diffs);
845         }
846         this.diffCleanupSemanticLossless(diffs);
847 
848         // Find any overlaps between deletions and insertions.
849         // e.g: <del>abcxxx</del><ins>xxxdef</ins>
850         //   -> <del>abc</del>xxx<ins>def</ins>
851         // e.g: <del>xxxabc</del><ins>defxxx</ins>
852         //   -> <ins>def</ins>xxx<del>abc</del>
853         // Only extract an overlap if it is as big as the edit ahead or behind it.
854         pointer = diffs.listIterator();
855         Diff prevDiff = null;
856         thisDiff = null;
857         if (pointer.hasNext()) {
858             prevDiff = pointer.next();
859             if (pointer.hasNext()) {
860                 thisDiff = pointer.next();
861             }
862         }
863 
864         while (thisDiff != null) {
865             if (prevDiff.getOperation() == Operation.DELETE &&
866                     thisDiff.getOperation() == Operation.INSERT) {
867                 String deletion = prevDiff.getText();
868                 String insertion = thisDiff.getText();
869                 int overlapLength1 = this.diffCommonOverlap(deletion, insertion);
870                 int overlapLength2 = this.diffCommonOverlap(insertion, deletion);
871                 if (overlapLength1 >= overlapLength2) {
872                     if (overlapLength1 >= deletion.length() / 2.0 ||
873                             overlapLength1 >= insertion.length() / 2.0) {
874                         // Overlap found.  Insert an equality and trim the surrounding edits.
875                         pointer.previous();
876                         pointer.add(new Diff(Operation.EQUAL,
877                                 insertion.substring(0, overlapLength1)));
878                         prevDiff.setText(deletion.substring(0, deletion.length() - overlapLength1));
879                         thisDiff.setText(insertion.substring(overlapLength1));
880                         // pointer.add inserts the element before the cursor, so there is
881                         // no need to step past the new element.
882                     }
883                 } else {
884                     if (overlapLength2 >= deletion.length() / 2.0 ||
885                             overlapLength2 >= insertion.length() / 2.0) {
886                         // Reverse overlap found.
887                         // Insert an equality and swap and trim the surrounding edits.
888                         pointer.previous();
889                         pointer.add(new Diff(Operation.EQUAL,
890                                 deletion.substring(0, overlapLength2)));
891                         prevDiff.setOperation(Operation.INSERT);
892                         prevDiff.setText(insertion.substring(0, insertion.length() - overlapLength2));
893                         thisDiff.setOperation(Operation.DELETE);
894                         thisDiff.setText(deletion.substring(overlapLength2));
895                         // pointer.add inserts the element before the cursor, so there is
896                         // no need to step past the new element.
897                     }
898                 }
899                 thisDiff = pointer.hasNext() ? pointer.next() : null;
900             }
901             prevDiff = thisDiff;
902             thisDiff = pointer.hasNext() ? pointer.next() : null;
903         }
904     }
905 
906     /**
907      * Look for single edits surrounded on both sides by equalities
908      * which can be shifted sideways to align the edit to a word boundary.
909      * e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
910      * @param diffs LinkedList of Diff objects.
911      */
912     public void diffCleanupSemanticLossless(List<Diff> diffs) {
913 
914         StringBuilder equality1 = new StringBuilder();
915         String edit;
916         StringBuilder equality2 = new StringBuilder();
917         String commonString;
918         int commonOffset;
919         int score;
920         int bestScore;
921         String bestEquality1;
922         String bestEdit;
923         String bestEquality2;
924         // Create a new iterator at the start.
925         ListIterator<Diff> pointer = diffs.listIterator();
926         Diff prevDiff = pointer.hasNext() ? pointer.next() : null;
927         Diff thisDiff = pointer.hasNext() ? pointer.next() : null;
928         Diff nextDiff = pointer.hasNext() ? pointer.next() : null;
929 
930         // Intentionally ignore the first and last element (don't need checking).
931         while (nextDiff != null) {
932             if (prevDiff.getOperation() == Operation.EQUAL &&
933                     nextDiff.getOperation() == Operation.EQUAL) {
934                 // This is a single edit surrounded by equalities.
935                 equality1.setLength(0);
936                 equality1.append(prevDiff.getText());
937                 edit = thisDiff.getText();
938                 equality2.setLength(0);
939                 equality2.append(nextDiff.getText());
940 
941                 // First, shift the edit as far left as possible.
942                 commonOffset = this.diffCommonSuffix(equality1.toString(), edit);
943                 if (commonOffset != 0) {
944                     commonString = edit.substring(edit.length() - commonOffset);
945                     String substring = equality1.substring(0, equality1.length() - commonOffset);
946                     equality1.setLength(0);
947                     equality1.append(substring);
948                     edit = commonString + edit.substring(0, edit.length() - commonOffset);
949                     equality2.insert(0, commonString);
950                 }
951 
952                 // Second, step character by character right, looking for the best fit.
953                 bestEquality1 = equality1.toString();
954                 bestEdit = edit;
955                 bestEquality2 = equality2.toString();
956                 bestScore = this.diffCleanupSemanticScore(equality1.toString(), edit)
957                         + this.diffCleanupSemanticScore(edit, equality2.toString());
958                 while (!edit.isEmpty() && equality2.length() != 0
959                         && edit.charAt(0) == equality2.charAt(0)) {
960                     equality1.append(edit.charAt(0));
961                     edit = edit.substring(1) + equality2.charAt(0);
962                     String substring = equality2.substring(1);
963                     equality2.setLength(0);
964                     equality2.append(substring);
965                     score = this.diffCleanupSemanticScore(equality1.toString(), edit)
966                             + this.diffCleanupSemanticScore(edit, equality2.toString());
967                     // The >= encourages trailing rather than leading whitespace on edits.
968                     if (score >= bestScore) {
969                         bestScore = score;
970                         bestEquality1 = equality1.toString();
971                         bestEdit = edit;
972                         bestEquality2 = equality2.toString();
973                     }
974                 }
975 
976                 if (!prevDiff.getText().equals(bestEquality1)) {
977                     // We have an improvement, save it back to the diff.
978                     if (!bestEquality1.isEmpty()) {
979                         prevDiff.setText(bestEquality1);
980                     } else {
981                         pointer.previous(); // Walk past nextDiff.
982                         pointer.previous(); // Walk past thisDiff.
983                         pointer.previous(); // Walk past prevDiff.
984                         pointer.remove(); // Delete prevDiff.
985                         pointer.next(); // Walk past thisDiff.
986                         pointer.next(); // Walk past nextDiff.
987                     }
988                     thisDiff.setText(bestEdit);
989                     if (!bestEquality2.isEmpty()) {
990                         nextDiff.setText(bestEquality2);
991                     } else {
992                         pointer.remove(); // Delete nextDiff.
993                         nextDiff = thisDiff;
994                         thisDiff = prevDiff;
995                     }
996                 }
997             }
998             prevDiff = thisDiff;
999             thisDiff = nextDiff;
1000             nextDiff = pointer.hasNext() ? pointer.next() : null;
1001         }
1002     }
1003 
1004     /**
1005      * Given two strings, compute a score representing whether the internal
1006      * boundary falls on logical boundaries.
1007      * Scores range from 6 (best) to 0 (worst).
1008      * @param one First string.
1009      * @param two Second string.
1010      * @return The score.
1011      */
1012     private int diffCleanupSemanticScore(String one, String two) {
1013 
1014         if (one.isEmpty() || two.isEmpty()) {
1015             // Edges are the best.
1016             return 6;
1017         }
1018 
1019         // Each port of this function behaves slightly differently due to
1020         // subtle differences in each language's definition of things like
1021         // 'whitespace'.  Since this function's purpose is largely cosmetic,
1022         // the choice has been made to use each language's native features
1023         // rather than force total conformity.
1024         char char1 = one.charAt(one.length() - 1);
1025         char char2 = two.charAt(0);
1026         boolean nonAlphaNumeric1 = !Character.isLetterOrDigit(char1);
1027         boolean nonAlphaNumeric2 = !Character.isLetterOrDigit(char2);
1028         boolean whitespace1 = nonAlphaNumeric1 && Character.isWhitespace(char1);
1029         boolean whitespace2 = nonAlphaNumeric2 && Character.isWhitespace(char2);
1030         boolean lineBreak1 = whitespace1
1031                 && Character.getType(char1) == Character.CONTROL;
1032         boolean lineBreak2 = whitespace2
1033                 && Character.getType(char2) == Character.CONTROL;
1034         boolean blankLine1 = lineBreak1 && DiffMatchPatch.BLANK_LINE_END.matcher(one).find();
1035         boolean blankLine2 = lineBreak2 && DiffMatchPatch.BLANK_LINE_START.matcher(two).find();
1036 
1037         if (blankLine1 || blankLine2) {
1038             // Five points for blank lines.
1039             return 5;
1040         } else if (lineBreak1 || lineBreak2) {
1041             // Four points for line breaks.
1042             return 4;
1043         } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) {
1044             // Three points for end of sentences.
1045             return 3;
1046         } else if (whitespace1 || whitespace2) {
1047             // Two points for whitespace.
1048             return 2;
1049         } else if (nonAlphaNumeric1 || nonAlphaNumeric2) {
1050             // One point for non-alphanumeric.
1051             return 1;
1052         }
1053         return 0;
1054     }
1055 
1056     /**
1057      * Reduce the number of edits by eliminating operationally trivial equalities.
1058      * @param diffs LinkedList of Diff objects.
1059      */
1060     public void diffCleanupEfficiency(LinkedList<Diff> diffs) {
1061 
1062         if (diffs.isEmpty()) {
1063             return;
1064         }
1065         boolean changes = false;
1066         // Synchronized Stack to avoid Exception
1067         Stack<Diff> equalities = new Stack<>();  // Stack of equalities.
1068         String lastequality = null; // Always equal to equalities.lastElement().text
1069         ListIterator<Diff> pointer = diffs.listIterator();
1070         // Is there an insertion operation before the last equality.
1071         boolean preIns = false;
1072         // Is there a deletion operation before the last equality.
1073         boolean preDel = false;
1074         // Is there an insertion operation after the last equality.
1075         boolean postIns = false;
1076         // Is there a deletion operation after the last equality.
1077         boolean postDel = false;
1078         Diff thisDiff = pointer.next();
1079         Diff safeDiff = thisDiff;  // The last Diff that is known to be unsplitable.
1080         while (thisDiff != null) {
1081 
1082             if (thisDiff.getOperation() == Operation.EQUAL) {
1083 
1084                 // Equality found.
1085                 if (thisDiff.getText().length() < DiffMatchPatch.DIFF_EDIT_COST && (postIns || postDel)) {
1086                     // Candidate found.
1087                     equalities.push(thisDiff);
1088                     preIns = postIns;
1089                     preDel = postDel;
1090                     lastequality = thisDiff.getText();
1091                 } else {
1092                     // Not a candidate, and can never become one.
1093                     equalities.clear();
1094                     lastequality = null;
1095                     safeDiff = thisDiff;
1096                 }
1097                 postIns = postDel = false;
1098             } else {
1099                 // An insertion or deletion.
1100                 if (thisDiff.getOperation() == Operation.DELETE) {
1101                     postDel = true;
1102                 } else {
1103                     postIns = true;
1104                 }
1105 
1106                 /*
1107                  * Five types to be split:
1108                  * <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
1109                  * <ins>A</ins>X<ins>C</ins><del>D</del>
1110                  * <ins>A</ins><del>B</del>X<ins>C</ins>
1111                  * <ins>A</del>X<ins>C</ins><del>D</del>
1112                  * <ins>A</ins><del>B</del>X<del>C</del>
1113                  */
1114                 if (
1115                         lastequality != null
1116                                 && (
1117                                 (preIns && preDel && postIns && postDel)
1118                                         || (
1119                                         (lastequality.length() < DiffMatchPatch.DIFF_EDIT_COST / 2)
1120                                                 && ((preIns ? 1 : 0) + (preDel ? 1 : 0) + (postIns ? 1 : 0) + (postDel ? 1 : 0)) == 3
1121                                 )
1122                         )
1123                 ) {
1124                     // Walk back to offending equality.
1125                     while (thisDiff != equalities.lastElement()) {
1126                         thisDiff = pointer.previous();
1127                     }
1128                     pointer.next();
1129 
1130                     // Replace equality with a delete.
1131                     pointer.set(new Diff(Operation.DELETE, lastequality));
1132                     // Insert a corresponding an insert.
1133                     thisDiff = new Diff(Operation.INSERT, lastequality);
1134                     pointer.add(thisDiff);
1135 
1136                     equalities.pop();  // Throw away the equality we just deleted.
1137                     lastequality = null;
1138                     if (preIns && preDel) {
1139                         // No changes made which could affect previous entry, keep going.
1140                         postIns = postDel = true;
1141                         equalities.clear();
1142                         safeDiff = thisDiff;
1143                     } else {
1144                         if (!equalities.empty()) {
1145                             // Throw away the previous equality (it needs to be reevaluated).
1146                             equalities.pop();
1147                         }
1148                         if (equalities.empty()) {
1149                             // There are no previous questionable equalities,
1150                             // walk back to the last known safe diff.
1151                             thisDiff = safeDiff;
1152                         } else {
1153                             // There is an equality we can fall back to.
1154                             thisDiff = equalities.lastElement();
1155                         }
1156                         while (thisDiff != pointer.previous()) {
1157                             // Intentionally empty loop.
1158                         }
1159                         postIns = postDel = false;
1160                     }
1161 
1162                     changes = true;
1163                 }
1164             }
1165             thisDiff = pointer.hasNext() ? pointer.next() : null;
1166         }
1167 
1168         if (changes) {
1169             this.diffCleanupMerge(diffs);
1170         }
1171     }
1172 
1173     /**
1174      * Reorder and merge like edit sections.  Merge equalities.
1175      * Any edit section can move as long as it doesn't cross an equality.
1176      * @param diffs LinkedList of Diff objects.
1177      */
1178     public void diffCleanupMerge(LinkedList<Diff> diffs) {
1179 
1180         diffs.add(new Diff(Operation.EQUAL, StringUtils.EMPTY));  // Add a dummy entry at the end.
1181         ListIterator<Diff> pointer = diffs.listIterator();
1182         int countDelete = 0;
1183         int countInsert = 0;
1184         StringBuilder textDelete = new StringBuilder();
1185         StringBuilder textInsert = new StringBuilder();
1186         Diff thisDiff = pointer.next();
1187         Diff prevEqual = null;
1188         int commonlength;
1189         while (thisDiff != null) {
1190             switch (thisDiff.getOperation()) {
1191                 case INSERT:
1192                     countInsert++;
1193                     textInsert.append(thisDiff.getText());
1194                     prevEqual = null;
1195                     break;
1196                 case DELETE:
1197                     countDelete++;
1198                     textDelete.append(thisDiff.getText());
1199                     prevEqual = null;
1200                     break;
1201                 case EQUAL:
1202                     if (countDelete + countInsert > 1) {
1203 
1204                         boolean bothTypes = countDelete != 0 && countInsert != 0;
1205                         // Delete the offending records.
1206                         pointer.previous();  // Reverse direction.
1207                         while (countDelete-- > 0) {
1208                             pointer.previous();
1209                             pointer.remove();
1210                         }
1211                         while (countInsert-- > 0) {
1212                             pointer.previous();
1213                             pointer.remove();
1214                         }
1215 
1216                         if (bothTypes) {
1217                             // Factor out any common prefixies.
1218                             commonlength = this.diffCommonPrefix(textInsert.toString(), textDelete.toString());
1219                             if (commonlength != 0) {
1220                                 if (pointer.hasPrevious()) {
1221                                     thisDiff = pointer.previous();
1222                                     // Previous diff should have been an equality: thisDiff.getOperation() == Operation.EQUAL")
1223                                     thisDiff.setText(thisDiff.getText() + textInsert.substring(0, commonlength));
1224                                     pointer.next();
1225                                 } else {
1226                                     pointer.add(new Diff(Operation.EQUAL,
1227                                             textInsert.substring(0, commonlength)));
1228                                 }
1229                                 String substringIns = textInsert.substring(commonlength);
1230                                 textInsert.setLength(0);
1231                                 textInsert.append(substringIns);
1232                                 String substringDel = textDelete.substring(commonlength);
1233                                 textDelete.setLength(0);
1234                                 textDelete.append(substringDel);
1235                             }
1236                             // Factor out any common suffixies.
1237                             commonlength = this.diffCommonSuffix(textInsert.toString(), textDelete.toString());
1238                             if (commonlength != 0) {
1239                                 thisDiff = pointer.next();
1240                                 thisDiff.setText(textInsert.substring(textInsert.length() - commonlength) + thisDiff.getText());
1241                                 String substringIns = textInsert.substring(0, textInsert.length() - commonlength);
1242                                 textInsert.setLength(0);
1243                                 textInsert.append(substringIns);
1244                                 String substringDel = textDelete.substring(0, textDelete.length() - commonlength);
1245                                 textDelete.setLength(0);
1246                                 textDelete.append(substringDel);
1247                                 pointer.previous();
1248                             }
1249                         }
1250                         // Insert the merged records.
1251                         if (textDelete.length() != 0) {
1252                             pointer.add(new Diff(Operation.DELETE, textDelete.toString()));
1253                         }
1254                         if (textInsert.length() != 0) {
1255                             pointer.add(new Diff(Operation.INSERT, textInsert.toString()));
1256                         }
1257                         // Step forward to the equality.
1258                         thisDiff = pointer.hasNext() ? pointer.next() : null;
1259                     } else if (prevEqual != null) {
1260                         // Merge this equality with the previous one.
1261                         prevEqual.setText(prevEqual.getText() + thisDiff.getText());
1262                         pointer.remove();
1263                         thisDiff = pointer.previous();
1264                         pointer.next();  // Forward direction
1265                     }
1266                     countInsert = 0;
1267                     countDelete = 0;
1268                     textDelete.setLength(0);
1269                     textInsert.setLength(0);
1270                     prevEqual = thisDiff;
1271                     break;
1272             }
1273             thisDiff = pointer.hasNext() ? pointer.next() : null;
1274         }
1275         if (diffs.getLast().getText().isEmpty()) {
1276             diffs.removeLast();  // Remove the dummy entry at the end.
1277         }
1278 
1279         /*
1280          * Second pass: look for single edits surrounded on both sides by equalities
1281          * which can be shifted sideways to eliminate an equality.
1282          * e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
1283          */
1284         boolean changes = false;
1285         // Create a new iterator at the start.
1286         // (As opposed to walking the current one back.)
1287         pointer = diffs.listIterator();
1288         Diff prevDiff = pointer.hasNext() ? pointer.next() : null;
1289         thisDiff = pointer.hasNext() ? pointer.next() : null;
1290         Diff nextDiff = pointer.hasNext() ? pointer.next() : null;
1291 
1292         // Intentionally ignore the first and last element (don't need checking).
1293         while (nextDiff != null) {
1294             if (prevDiff.getOperation() == Operation.EQUAL &&
1295                     nextDiff.getOperation() == Operation.EQUAL) {
1296                 // This is a single edit surrounded by equalities.
1297                 if (thisDiff.getText().endsWith(prevDiff.getText())) {
1298                     // Shift the edit over the previous equality.
1299                     thisDiff.setText(prevDiff.getText()
1300                             + thisDiff.getText().substring(0, thisDiff.getText().length()
1301                             - prevDiff.getText().length()));
1302                     nextDiff.setText(prevDiff.getText() + nextDiff.getText());
1303                     pointer.previous(); // Walk past nextDiff.
1304                     pointer.previous(); // Walk past thisDiff.
1305                     pointer.previous(); // Walk past prevDiff.
1306                     pointer.remove(); // Delete prevDiff.
1307                     pointer.next(); // Walk past thisDiff.
1308                     thisDiff = pointer.next(); // Walk past nextDiff.
1309                     nextDiff = pointer.hasNext() ? pointer.next() : null;
1310                     changes = true;
1311                 } else if (thisDiff.getText().startsWith(nextDiff.getText())) {
1312                     // Shift the edit over the next equality.
1313                     prevDiff.setText(prevDiff.getText() + nextDiff.getText());
1314                     thisDiff.setText(thisDiff.getText().substring(nextDiff.getText().length())
1315                             + nextDiff.getText());
1316                     pointer.remove(); // Delete nextDiff.
1317                     nextDiff = pointer.hasNext() ? pointer.next() : null;
1318                     changes = true;
1319                 }
1320             }
1321             prevDiff = thisDiff;
1322             thisDiff = nextDiff;
1323             nextDiff = pointer.hasNext() ? pointer.next() : null;
1324         }
1325         // If shifts were made, the diff needs reordering and another shift sweep.
1326         if (changes) {
1327             this.diffCleanupMerge(diffs);
1328         }
1329     }
1330 
1331     /**
1332      * loc is a location in text1, compute and return the equivalent location in
1333      * text2.
1334      * e.g. "The cat" vs "The big cat", 1->1, 5->8
1335      * @param diffs List of Diff objects.
1336      * @param loc Location within text1.
1337      * @return Location within text2.
1338      */
1339     public int diffXIndex(List<Diff> diffs, int loc) {
1340         int chars1 = 0;
1341         int chars2 = 0;
1342         int lastChars1 = 0;
1343         int lastChars2 = 0;
1344         Diff lastDiff = null;
1345         for (Diff aDiff : diffs) {
1346             if (aDiff.getOperation() != Operation.INSERT) {
1347                 // Equality or deletion.
1348                 chars1 += aDiff.getText().length();
1349             }
1350             if (aDiff.getOperation() != Operation.DELETE) {
1351                 // Equality or insertion.
1352                 chars2 += aDiff.getText().length();
1353             }
1354             if (chars1 > loc) {
1355                 // Overshot the location.
1356                 lastDiff = aDiff;
1357                 break;
1358             }
1359             lastChars1 = chars1;
1360             lastChars2 = chars2;
1361         }
1362         if (lastDiff != null && lastDiff.getOperation() == Operation.DELETE) {
1363             // The location was deleted.
1364             return lastChars2;
1365         }
1366         // Add the remaining character length.
1367         return lastChars2 + loc - lastChars1;
1368     }
1369 
1370     /**
1371      * Convert a Diff list into a pretty HTML report.
1372      * @param diffs List of Diff objects.
1373      * @return HTML representation.
1374      */
1375     public String diffPrettyHtml(List<Diff> diffs) {
1376         StringBuilder html = new StringBuilder();
1377         for (Diff aDiff : diffs) {
1378             String text = aDiff.getText().replace("&", "&amp;").replace("<", "&lt;")
1379                     .replace(">", "&gt;").replace("\n", "&para;<br>");
1380             switch (aDiff.getOperation()) {
1381                 case INSERT:
1382                     html.append("<ins style=\"background:#e6ffe6;\">").append(text)
1383                             .append("</ins>");
1384                     break;
1385                 case DELETE:
1386                     html.append("<del style=\"background:#ffe6e6;\">").append(text)
1387                             .append("</del>");
1388                     break;
1389                 case EQUAL:
1390                     html.append("<span>").append(text).append("</span>");
1391                     break;
1392             }
1393         }
1394         return html.toString();
1395     }
1396 
1397     /**
1398      * Compute and return the source text (all equalities and deletions).
1399      * @param diffs List of Diff objects.
1400      * @return Source text.
1401      */
1402     public String diffText1(List<Diff> diffs) {
1403         StringBuilder text = new StringBuilder();
1404         for (Diff aDiff : diffs) {
1405             if (aDiff.getOperation() != Operation.INSERT) {
1406                 text.append(aDiff.getText());
1407             }
1408         }
1409         return text.toString();
1410     }
1411 
1412     /**
1413      * Compute and return the destination text (all equalities and insertions).
1414      * @param diffs List of Diff objects.
1415      * @return Destination text.
1416      */
1417     public String diffText2(List<Diff> diffs) {
1418         StringBuilder text = new StringBuilder();
1419         for (Diff aDiff : diffs) {
1420             if (aDiff.getOperation() != Operation.DELETE) {
1421                 text.append(aDiff.getText());
1422             }
1423         }
1424         return text.toString();
1425     }
1426 
1427     /**
1428      * Compute the Levenshtein distance; the number of inserted, deleted or
1429      * substituted characters.
1430      * @param diffs List of Diff objects.
1431      * @return Number of changes.
1432      */
1433     public int diffLevenshtein(List<Diff> diffs) {
1434         int levenshtein = 0;
1435         int insertions = 0;
1436         int deletions = 0;
1437         for (Diff aDiff : diffs) {
1438             switch (aDiff.getOperation()) {
1439                 case INSERT:
1440                     insertions += aDiff.getText().length();
1441                     break;
1442                 case DELETE:
1443                     deletions += aDiff.getText().length();
1444                     break;
1445                 case EQUAL:
1446                     // A deletion and an insertion is one substitution.
1447                     levenshtein += Math.max(insertions, deletions);
1448                     insertions = 0;
1449                     deletions = 0;
1450                     break;
1451             }
1452         }
1453         levenshtein += Math.max(insertions, deletions);
1454         return levenshtein;
1455     }
1456 
1457     /**
1458      * Crush the diff into an encoded string which describes the operations
1459      * required to transform text1 into text2.
1460      * E.g. =3\t-2\t+ing  -> Keep 3 chars, delete 2 chars, insert 'ing'.
1461      * Operations are tab-separated.  Inserted text is escaped using %xx notation.
1462      * @param diffs Array of Diff objects.
1463      * @return Delta text.
1464      */
1465     public String diffToDelta(List<Diff> diffs) {
1466 
1467         StringBuilder text = new StringBuilder();
1468         for (Diff aDiff : diffs) {
1469             switch (aDiff.getOperation()) {
1470                 case INSERT:
1471                     text.append("+").append(URLEncoder.encode(aDiff.getText(), StandardCharsets.UTF_8)
1472                             .replace('+', ' ')).append("\t");
1473                     break;
1474                 case DELETE:
1475                     text.append("-").append(aDiff.getText().length()).append("\t");
1476                     break;
1477                 case EQUAL:
1478                     text.append("=").append(aDiff.getText().length()).append("\t");
1479                     break;
1480             }
1481         }
1482         String delta = text.toString();
1483         if (!delta.isEmpty()) {
1484             // Strip off trailing tab character.
1485             delta = delta.substring(0, delta.length() - 1);
1486             delta = Patch.unescapeForEncodeUriCompatability(delta);
1487         }
1488         return delta;
1489     }
1490 
1491     /**
1492      * Given the original text1, and an encoded string which describes the
1493      * operations required to transform text1 into text2, compute the full diff.
1494      * @param text1 Source string for the diff.
1495      * @param delta Delta text.
1496      * @return list of Diff objects or null if invalid.
1497      */
1498     public List<Diff> diffFromDelta(String text1, String delta) {
1499 
1500         List<Diff> diffs = new LinkedList<>();
1501         int pointer = 0;  // Cursor in text1
1502         String[] tokens = delta.split("\t");
1503 
1504         for (String token : tokens) {
1505             if (token.isEmpty()) {
1506                 // Blank tokens are ok (from a trailing \t).
1507                 continue;
1508             }
1509             // Each token begins with a one character parameter which specifies the
1510             // operation of this token (delete, insert, equality).
1511             String param = token.substring(1);
1512             switch (token.charAt(0)) {
1513                 case '+':
1514                     // decode would change all "+" to " "
1515                     param = param.replace("+", "%2B");
1516                     try {
1517                         param = URLDecoder.decode(param, StandardCharsets.UTF_8);
1518                     } catch (IllegalArgumentException e) {
1519                         // Malformed URI sequence.
1520                         throw new IllegalArgumentException(
1521                                 "Illegal escape in diff_fromDelta: " + param, e);
1522                     }
1523                     diffs.add(new Diff(Operation.INSERT, param));
1524                     break;
1525                 case '-':
1526                     // Fall through.
1527                 case '=':
1528                     int n;
1529                     try {
1530                         n = Integer.parseInt(param);
1531                     } catch (NumberFormatException e) {
1532                         throw new IllegalArgumentException(
1533                                 "Invalid number in diff_fromDelta: " + param, e);
1534                     }
1535                     if (n < 0) {
1536                         throw new IllegalArgumentException(
1537                                 "Negative number in diff_fromDelta: " + param);
1538                     }
1539                     String text;
1540                     try {
1541                         int p1 = pointer;
1542                         pointer += n;
1543                         int p2 = pointer;
1544                         text = text1.substring(p1, p2);
1545                     } catch (StringIndexOutOfBoundsException e) {
1546                         throw new IllegalArgumentException("Delta length (" + pointer
1547                                 + ") larger than source text length (" + text1.length()
1548                                 + ").", e);
1549                     }
1550                     if (token.charAt(0) == '=') {
1551                         diffs.add(new Diff(Operation.EQUAL, text));
1552                     } else {
1553                         diffs.add(new Diff(Operation.DELETE, text));
1554                     }
1555                     break;
1556                 default:
1557                     // Anything else is an error.
1558                     throw new IllegalArgumentException(
1559                             "Invalid diff operation in diff_fromDelta: " + token.charAt(0));
1560             }
1561         }
1562         if (pointer != text1.length()) {
1563             throw new IllegalArgumentException("Delta length (" + pointer
1564                     + ") smaller than source text length (" + text1.length() + ").");
1565         }
1566         return diffs;
1567     }
1568 
1569 
1570     //  MATCH FUNCTIONS
1571 
1572     /**
1573      * Locate the best instance of 'pattern' in 'text' near 'loc'.
1574      * Returns -1 if no match found.
1575      * @param text The text to search.
1576      * @param pattern The pattern to search for.
1577      * @param valueLoc The location to search around.
1578      * @return Best match index or -1.
1579      */
1580     public int matchMain(String text, String pattern, int valueLoc) {
1581         // Check for null inputs.
1582         if (text == null || pattern == null) {
1583             throw new IllegalArgumentException("Null inputs. (match_main)");
1584         }
1585 
1586         int loc = Math.max(0, Math.min(valueLoc, text.length()));
1587         if (text.equals(pattern)) {
1588             // Shortcut (potentially not guaranteed by the algorithm)
1589             return 0;
1590         } else if (text.isEmpty()) {
1591             // Nothing to match.
1592             return -1;
1593         } else if (loc + pattern.length() <= text.length()
1594                 && text.substring(loc, loc + pattern.length()).equals(pattern)) {
1595             // Perfect match at the perfect spot!  (Includes case of null pattern)
1596             return loc;
1597         } else {
1598             // Do a fuzzy compare.
1599             return this.matchBitap(text, pattern, loc);
1600         }
1601     }
1602 
1603     /**
1604      * Locate the best instance of 'pattern' in 'text' near 'loc' using the
1605      * Bitap algorithm.  Returns -1 if no match found.
1606      * @param text The text to search.
1607      * @param pattern The pattern to search for.
1608      * @param loc The location to search around.
1609      * @return Best match index or -1.
1610      */
1611     protected int matchBitap(String text, String pattern, int loc) {
1612 
1613         // Initialize the alphabet.
1614         Map<Character, Integer> s = this.matchAlphabet(pattern);
1615 
1616         // Highest score beyond which we give up.
1617         double scoreThreshold = DiffMatchPatch.MATCH_THRESHOLD;
1618         // Is there a nearby exact match? (speedup)
1619         int bestLoc = text.indexOf(pattern, loc);
1620         if (bestLoc != -1) {
1621             scoreThreshold = Math.min(this.matchBitapScore(0, bestLoc, loc, pattern),
1622                     scoreThreshold);
1623             // What about in the other direction? (speedup)
1624             bestLoc = text.lastIndexOf(pattern, loc + pattern.length());
1625             if (bestLoc != -1) {
1626                 scoreThreshold = Math.min(this.matchBitapScore(0, bestLoc, loc, pattern),
1627                         scoreThreshold);
1628             }
1629         }
1630 
1631         // Initialize the bit arrays.
1632         int matchmask = 1 << (pattern.length() - 1);
1633         bestLoc = -1;
1634 
1635         int binMin;
1636         int binMid;
1637         int binMax = pattern.length() + text.length();
1638         // Empty initialization added to appease Java compiler.
1639         int[] lastRd = new int[0];
1640         for (int d = 0; d < pattern.length(); d++) {
1641 
1642             // Scan for the best match; each iteration allows for one more error.
1643             // Run a binary search to determine how far from 'loc' we can stray at
1644             // this error level.
1645             binMin = 0;
1646             binMid = binMax;
1647             while (binMin < binMid) {
1648                 if (this.matchBitapScore(d, loc + binMid, loc, pattern)
1649                         <= scoreThreshold) {
1650                     binMin = binMid;
1651                 } else {
1652                     binMax = binMid;
1653                 }
1654                 binMid = (binMax - binMin) / 2 + binMin;
1655             }
1656             // Use the result from this iteration as the maximum for the next.
1657             binMax = binMid;
1658             int start = Math.max(1, loc - binMid + 1);
1659             int finish = Math.min(loc + binMid, text.length()) + pattern.length();
1660 
1661             int[] rd = new int[finish + 2];
1662             rd[finish + 1] = (1 << d) - 1;
1663             for (int j = finish; j >= start; j--) {
1664 
1665                 int charMatch;
1666                 if (text.length() <= j - 1 || !s.containsKey(text.charAt(j - 1))) {
1667                     // Out of range.
1668                     charMatch = 0;
1669                 } else {
1670                     charMatch = s.get(text.charAt(j - 1));
1671                 }
1672                 if (d == 0) {
1673                     // First pass: exact match.
1674                     rd[j] = ((rd[j + 1] << 1) | 1) & charMatch;
1675                 } else {
1676                     // Subsequent passes : fuzzy match.
1677                     rd[j] = (((rd[j + 1] << 1) | 1) & charMatch)
1678                             | (((lastRd[j + 1] | lastRd[j]) << 1) | 1) | lastRd[j + 1];
1679                 }
1680 
1681                 if ((rd[j] & matchmask) != 0) {
1682                     double score = this.matchBitapScore(d, j - 1, loc, pattern);
1683                     // This match will almost certainly be better than any existing
1684                     // match.  But check anyway.
1685                     if (score <= scoreThreshold) {
1686                         // Told you so.
1687                         scoreThreshold = score;
1688                         bestLoc = j - 1;
1689                         if (bestLoc > loc) {
1690                             // When passing loc, don't exceed our current distance from loc.
1691                             start = Math.max(1, 2 * loc - bestLoc);
1692                         } else {
1693                             // Already passed loc, downhill from here on in.
1694                             break;
1695                         }
1696                     }
1697                 }
1698             }
1699             if (this.matchBitapScore(d + 1, loc, loc, pattern) > scoreThreshold) {
1700                 // No hope for a (better) match at greater error levels.
1701                 break;
1702             }
1703             lastRd = rd;
1704         }
1705         return bestLoc;
1706     }
1707 
1708     /**
1709      * Compute and return the score for a match with e errors and x location.
1710      * @param e Number of errors in match.
1711      * @param x Location of match.
1712      * @param loc Expected location of match.
1713      * @param pattern Pattern being sought.
1714      * @return Overall score for match (0.0 = good, 1.0 = bad).
1715      */
1716     private double matchBitapScore(int e, int x, int loc, String pattern) {
1717         float accuracy = (float) e / pattern.length();
1718         int proximity = Math.abs(loc - x);
1719         return accuracy + (proximity / (float) DiffMatchPatch.MATCH_DISTANCE);
1720     }
1721 
1722     /**
1723      * Initialize the alphabet for the Bitap algorithm.
1724      * @param pattern The text to encode.
1725      * @return Hash of character locations.
1726      */
1727     protected Map<Character, Integer> matchAlphabet(String pattern) {
1728         Map<Character, Integer> s = new HashMap<>();
1729         char[] charPattern = pattern.toCharArray();
1730         for (char c : charPattern) {
1731             s.put(c, 0);
1732         }
1733         int i = 0;
1734         for (char c : charPattern) {
1735             s.put(c, s.get(c) | (1 << (pattern.length() - i - 1)));
1736             i++;
1737         }
1738         return s;
1739     }
1740 
1741 
1742     //  PATCH FUNCTIONS
1743 
1744     /**
1745      * Increase the context until it is unique,
1746      * but don't let the pattern expand beyond Match_MaxBits.
1747      * @param patch The patch to grow.
1748      * @param text Source text.
1749      */
1750     protected void patchAddContext(Patch patch, String text) {
1751 
1752         if (text.isEmpty()) {
1753             return;
1754         }
1755         String pattern = text.substring(patch.getStart2(), patch.getStart2() + patch.getLength1());
1756         int padding = 0;
1757 
1758         // Look for the first and last matches of pattern in text.  If two different
1759         // matches are found, increase the pattern length.
1760         while (text.indexOf(pattern) != text.lastIndexOf(pattern)
1761                 && pattern.length() < DiffMatchPatch.MATCH_MAX_BITS - DiffMatchPatch.PATCH_MARGIN - DiffMatchPatch.PATCH_MARGIN) {
1762             padding += DiffMatchPatch.PATCH_MARGIN;
1763             pattern = text.substring(Math.max(0, patch.getStart2() - padding),
1764                     Math.min(text.length(), patch.getStart2() + patch.getLength1() + padding));
1765         }
1766         // Add one chunk for good luck.
1767         padding += DiffMatchPatch.PATCH_MARGIN;
1768 
1769         // Add the prefix.
1770         String prefix = text.substring(Math.max(0, patch.getStart2() - padding),
1771                 patch.getStart2());
1772         if (!prefix.isEmpty()) {
1773             patch.getDiffs().addFirst(new Diff(Operation.EQUAL, prefix));
1774         }
1775         // Add the suffix.
1776         String suffix = text.substring(patch.getStart2() + patch.getLength1(),
1777                 Math.min(text.length(), patch.getStart2() + patch.getLength1() + padding));
1778         if (!suffix.isEmpty()) {
1779             patch.getDiffs().addLast(new Diff(Operation.EQUAL, suffix));
1780         }
1781 
1782         // Roll back the start points.
1783         patch.setStart1(patch.getStart1() - prefix.length());
1784         patch.setStart2(patch.getStart2() - prefix.length());
1785         // Extend the lengths.
1786         patch.setLength1(patch.getLength1() + prefix.length() + suffix.length());
1787         patch.setLength2(patch.getLength2() + prefix.length() + suffix.length());
1788     }
1789 
1790     /**
1791      * Compute a list of patches to turn text1 into text2.
1792      * A set of diffs will be computed.
1793      * @param text1 Old text.
1794      * @param text2 New text.
1795      * @return LinkedList of Patch objects.
1796      */
1797     public List<Patch> patchMake(String text1, String text2) {
1798         if (text1 == null || text2 == null) {
1799             throw new IllegalArgumentException("Null inputs. (patch_make)");
1800         }
1801         // No diffs provided, compute our own.
1802         LinkedList<Diff> diffs = this.diffMain(text1, text2, true);
1803         if (diffs.size() > 2) {
1804             this.diffCleanupSemantic(diffs);
1805             this.diffCleanupEfficiency(diffs);
1806         }
1807         return this.patchMake(text1, diffs);
1808     }
1809 
1810     /**
1811      * Compute a list of patches to turn text1 into text2.
1812      * text1 will be derived from the provided diffs.
1813      * @param diffs Array of Diff objects for text1 to text2.
1814      * @return LinkedList of Patch objects.
1815      */
1816     public List<Patch> patchMake(LinkedList<Diff> diffs) {
1817         if (diffs == null) {
1818             throw new IllegalArgumentException("Null inputs. (patch_make)");
1819         }
1820         // No origin string provided, compute our own.
1821         String text1 = this.diffText1(diffs);
1822         return this.patchMake(text1, diffs);
1823     }
1824 
1825     /**
1826      * Compute a list of patches to turn text1 into text2.
1827      * text2 is not provided, diffs are the delta between text1 and text2.
1828      * @param text1 Old text.
1829      * @param diffs Array of Diff objects for text1 to text2.
1830      * @return Deque of Patch objects.
1831      */
1832     public List<Patch> patchMake(String text1, Deque<Diff> diffs) {
1833 
1834         if (text1 == null || diffs == null) {
1835             throw new IllegalArgumentException("Null inputs. (patch_make)");
1836         }
1837 
1838         List<Patch> patches = new LinkedList<>();
1839         if (diffs.isEmpty()) {
1840             return patches;  // Get rid of the null case.
1841         }
1842         Patch patch = new Patch();
1843         int charCount1 = 0;  // Number of characters into the text1 string.
1844         int charCount2 = 0;  // Number of characters into the text2 string.
1845         // Start with text1 (prepatch_text) and apply the diffs until we arrive at
1846         // text2 (postpatch_text). We recreate the patches one by one to determine
1847         // context info.
1848         String prepatchText = text1;
1849         String postpatchText = text1;
1850 
1851         for (Diff aDiff : diffs) {
1852             if (patch.getDiffs().isEmpty() && aDiff.getOperation() != Operation.EQUAL) {
1853                 // A new patch starts here.
1854                 patch.setStart1(charCount1);
1855                 patch.setStart2(charCount2);
1856             }
1857 
1858             switch (aDiff.getOperation()) {
1859                 case INSERT:
1860                     patch.getDiffs().add(aDiff);
1861                     patch.setLength2(patch.getLength2() + aDiff.getText().length());
1862                     postpatchText = postpatchText.substring(0, charCount2)
1863                             + aDiff.getText() + postpatchText.substring(charCount2);
1864                     break;
1865                 case DELETE:
1866                     patch.setLength1(patch.getLength1() + aDiff.getText().length());
1867                     patch.getDiffs().add(aDiff);
1868                     postpatchText = postpatchText.substring(0, charCount2)
1869                             + postpatchText.substring(charCount2 + aDiff.getText().length());
1870                     break;
1871                 case EQUAL:
1872                     if (
1873                             aDiff.getText().length() <= 2 * DiffMatchPatch.PATCH_MARGIN
1874                                     && !patch.getDiffs().isEmpty() && aDiff != diffs.getLast()
1875                     ) {
1876                         // Small equality inside a patch.
1877                         patch.getDiffs().add(aDiff);
1878                         patch.setLength1(patch.getLength1() + aDiff.getText().length());
1879                         patch.setLength2(patch.getLength2() + aDiff.getText().length());
1880                     }
1881 
1882                     if (
1883                             aDiff.getText().length() >= 2 * DiffMatchPatch.PATCH_MARGIN
1884                                     && !patch.getDiffs().isEmpty()
1885                     ) {
1886                         // Time for a new patch.
1887                         this.patchAddContext(patch, prepatchText);
1888                         patches.add(patch);
1889                         patch = new Patch();
1890                         // Unlike Unidiff, our patch lists have a rolling context.
1891                         // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
1892                         // Update prepatch text & pos to reflect the application of the
1893                         // just completed patch.
1894                         prepatchText = postpatchText;
1895                         charCount1 = charCount2;
1896                     }
1897                     break;
1898             }
1899 
1900             // Update the current character count.
1901             if (aDiff.getOperation() != Operation.INSERT) {
1902                 charCount1 += aDiff.getText().length();
1903             }
1904             if (aDiff.getOperation() != Operation.DELETE) {
1905                 charCount2 += aDiff.getText().length();
1906             }
1907         }
1908         // Pick up the leftover patch if not empty.
1909         if (!patch.getDiffs().isEmpty()) {
1910             this.patchAddContext(patch, prepatchText);
1911             patches.add(patch);
1912         }
1913 
1914         return patches;
1915     }
1916 
1917     /**
1918      * Given an array of patches, return another array that is identical.
1919      * @param patches Array of Patch objects.
1920      * @return list of Patch objects.
1921      */
1922     public LinkedList<Patch> patchDeepCopy(List<Patch> patches) {
1923         LinkedList<Patch> patchesCopy = new LinkedList<>();
1924         for (Patch aPatch : patches) {
1925             Patch patchCopy = new Patch();
1926             for (Diff aDiff : aPatch.getDiffs()) {
1927                 Diff diffCopy = new Diff(aDiff.getOperation(), aDiff.getText());
1928                 patchCopy.getDiffs().add(diffCopy);
1929             }
1930             patchCopy.setStart1(aPatch.getStart1());
1931             patchCopy.setStart2(aPatch.getStart2());
1932             patchCopy.setLength1(aPatch.getLength1());
1933             patchCopy.setLength2(aPatch.getLength2());
1934             patchesCopy.add(patchCopy);
1935         }
1936         return patchesCopy;
1937     }
1938 
1939     /**
1940      * Merge a set of patches onto the text.  Return a patched text, as well
1941      * as an array of true/false values indicating which patches were applied.
1942      * @param valuePatches Array of Patch objects
1943      * @param valueText Old text.
1944      * @return Two element Object array, containing the new text and an array of
1945      *      boolean values.
1946      */
1947     public Object[] patchApply(LinkedList<Patch> valuePatches, String valueText) {
1948 
1949         if (valuePatches.isEmpty()) {
1950             return new Object[]{valueText, new boolean[0]};
1951         }
1952 
1953         // Deep copy the patches so that no changes are made to originals.
1954         LinkedList<Patch> patches = this.patchDeepCopy(valuePatches);
1955 
1956         String nullPadding = this.patchAddPadding(patches);
1957         String text = nullPadding + valueText + nullPadding;
1958         this.patchSplitMax(patches);
1959 
1960         int x = 0;
1961         // delta keeps track of the offset between the expected and actual location
1962         // of the previous patch.  If there are patches expected at positions 10 and
1963         // 20, but the first patch was found at 12, delta is 2 and the second patch
1964         // has an effective expected position of 22.
1965         int delta = 0;
1966         boolean[] results = new boolean[patches.size()];
1967         for (Patch aPatch : patches) {
1968 
1969             int expectedLoc = aPatch.getStart2() + delta;
1970             String text1 = this.diffText1(aPatch.getDiffs());
1971             int startLoc;
1972             int endLoc = -1;
1973 
1974             if (text1.length() > DiffMatchPatch.MATCH_MAX_BITS) {
1975                 // patch_splitMax will only provide an oversized pattern in the case of
1976                 // a monster delete.
1977                 startLoc = this.matchMain(text,
1978                         text1.substring(0, DiffMatchPatch.MATCH_MAX_BITS), expectedLoc);
1979                 if (startLoc != -1) {
1980                     endLoc = this.matchMain(text,
1981                             text1.substring(text1.length() - DiffMatchPatch.MATCH_MAX_BITS),
1982                             expectedLoc + text1.length() - DiffMatchPatch.MATCH_MAX_BITS);
1983                     if (endLoc == -1 || startLoc >= endLoc) {
1984                         // Can't find valid trailing context.  Drop this patch.
1985                         startLoc = -1;
1986                     }
1987                 }
1988             } else {
1989                 startLoc = this.matchMain(text, text1, expectedLoc);
1990             }
1991 
1992             if (startLoc == -1) {
1993                 // No match found.  :(
1994                 results[x] = false;
1995                 // Subtract the delta for this failed patch from subsequent patches.
1996                 delta -= aPatch.getLength2() - aPatch.getLength1();
1997             } else {
1998                 // Found a match.  :)
1999                 results[x] = true;
2000                 delta = startLoc - expectedLoc;
2001                 String text2;
2002                 if (endLoc == -1) {
2003                     text2 = text.substring(startLoc,
2004                             Math.min(startLoc + text1.length(), text.length()));
2005                 } else {
2006                     text2 = text.substring(startLoc,
2007                             Math.min(endLoc + DiffMatchPatch.MATCH_MAX_BITS, text.length()));
2008                 }
2009 
2010                 if (text1.equals(text2)) {
2011                     // Perfect match, just shove the replacement text in.
2012                     text = text.substring(0, startLoc) + this.diffText2(aPatch.getDiffs())
2013                             + text.substring(startLoc + text1.length());
2014                 } else {
2015                     // Imperfect match.  Run a diff to get a framework of equivalent
2016                     // indices.
2017                     List<Diff> diffs = this.diffMain(text1, text2, false);
2018                     if (text1.length() > DiffMatchPatch.MATCH_MAX_BITS
2019                             && this.diffLevenshtein(diffs) / (float) text1.length()
2020                             > DiffMatchPatch.PATCH_DELETE_THRESHOLD) {
2021                         // The end points match, but the content is unacceptably bad.
2022                         results[x] = false;
2023                     } else {
2024                         this.diffCleanupSemanticLossless(diffs);
2025                         int index1 = 0;
2026                         for (Diff aDiff : aPatch.getDiffs()) {
2027                             if (aDiff.getOperation() != Operation.EQUAL) {
2028                                 int index2 = this.diffXIndex(diffs, index1);
2029                                 if (aDiff.getOperation() == Operation.INSERT) {
2030                                     // Insertion
2031                                     text = text.substring(0, startLoc + index2) + aDiff.getText()
2032                                             + text.substring(startLoc + index2);
2033                                 } else if (aDiff.getOperation() == Operation.DELETE) {
2034                                     // Deletion
2035                                     text = text.substring(0, startLoc + index2)
2036                                             + text.substring(startLoc + this.diffXIndex(diffs,
2037                                             index1 + aDiff.getText().length()));
2038                                 }
2039                             }
2040                             if (aDiff.getOperation() != Operation.DELETE) {
2041                                 index1 += aDiff.getText().length();
2042                             }
2043                         }
2044                     }
2045                 }
2046             }
2047             x++;
2048         }
2049         // Strip the padding off.
2050         text = text.substring(nullPadding.length(), text.length()
2051                 - nullPadding.length());
2052         return new Object[]{text, results};
2053     }
2054 
2055     /**
2056      * Add some padding on text start and end so that edges can match something.
2057      * Intended to be called only from within patch_apply.
2058      * @param patches Array of Patch objects.
2059      * @return The padding string added to each side.
2060      */
2061     public String patchAddPadding(Deque<Patch> patches) {
2062 
2063         short paddingLength = DiffMatchPatch.PATCH_MARGIN;
2064         StringBuilder nullPadding = new StringBuilder();
2065         for (short x = 1; x <= paddingLength; x++) {
2066             nullPadding.append((char) x);
2067         }
2068 
2069         // Bump all the patches forward.
2070         for (Patch aPatch : patches) {
2071             aPatch.setStart1(aPatch.getStart1() + paddingLength);
2072             aPatch.setStart2(aPatch.getStart2() + paddingLength);
2073         }
2074 
2075         // Add some padding on start of first diff.
2076         Patch patch = patches.getFirst();
2077         Deque<Diff> diffs = patch.getDiffs();
2078         if (diffs.isEmpty() || diffs.getFirst().getOperation() != Operation.EQUAL) {
2079             // Add nullPadding equality.
2080             diffs.addFirst(new Diff(Operation.EQUAL, nullPadding.toString()));
2081             patch.setStart1(patch.getStart1() - paddingLength);  // Should be 0.
2082             patch.setStart2(patch.getStart2() - paddingLength);  // Should be 0.
2083             patch.setLength1(patch.getLength1() + paddingLength);
2084             patch.setLength2(patch.getLength2() + paddingLength);
2085         } else if (paddingLength > diffs.getFirst().getText().length()) {
2086             // Grow first equality.
2087             Diff firstDiff = diffs.getFirst();
2088             int extraLength = paddingLength - firstDiff.getText().length();
2089             firstDiff.setText(nullPadding.substring(firstDiff.getText().length())
2090                     + firstDiff.getText());
2091             patch.setStart1(patch.getStart1() - extraLength);
2092             patch.setStart2(patch.getStart2() - extraLength);
2093             patch.setLength1(patch.getLength1() + extraLength);
2094             patch.setLength2(patch.getLength2() + extraLength);
2095         }
2096 
2097         // Add some padding on end of last diff.
2098         patch = patches.getLast();
2099         diffs = patch.getDiffs();
2100         if (diffs.isEmpty() || diffs.getLast().getOperation() != Operation.EQUAL) {
2101             // Add nullPadding equality.
2102             diffs.addLast(new Diff(Operation.EQUAL, nullPadding.toString()));
2103             patch.setLength1(patch.getLength1() + paddingLength);
2104             patch.setLength2(patch.getLength2() + paddingLength);
2105         } else if (paddingLength > diffs.getLast().getText().length()) {
2106             // Grow last equality.
2107             Diff lastDiff = diffs.getLast();
2108             int extraLength = paddingLength - lastDiff.getText().length();
2109             lastDiff.setText(lastDiff.getText() + nullPadding.substring(0, extraLength));
2110             patch.setLength1(patch.getLength1() + extraLength);
2111             patch.setLength2(patch.getLength2() + extraLength);
2112         }
2113 
2114         return nullPadding.toString();
2115     }
2116 
2117     /**
2118      * Look through the patches and break up any which are longer than the
2119      * maximum limit of the match algorithm.
2120      * Intended to be called only from within patch_apply.
2121      * @param patches List of Patch objects.
2122      */
2123     public void patchSplitMax(List<Patch> patches) {
2124 
2125         short patchSize = DiffMatchPatch.MATCH_MAX_BITS;
2126         String precontext;
2127         String postcontext;
2128         Patch patch;
2129         int start1;
2130         int start2;
2131         boolean empty;
2132         Operation diffType;
2133         String diffText;
2134         ListIterator<Patch> pointer = patches.listIterator();
2135         Patch bigpatch = pointer.hasNext() ? pointer.next() : null;
2136 
2137         while (bigpatch != null) {
2138             if (bigpatch.getLength1() <= DiffMatchPatch.MATCH_MAX_BITS) {
2139                 bigpatch = pointer.hasNext() ? pointer.next() : null;
2140                 continue;
2141             }
2142             // Remove the big old patch.
2143             pointer.remove();
2144             start1 = bigpatch.getStart1();
2145             start2 = bigpatch.getStart2();
2146             precontext = StringUtils.EMPTY;
2147 
2148             while (!bigpatch.getDiffs().isEmpty()) {
2149                 // Create one of several smaller patches.
2150                 patch = new Patch();
2151                 empty = true;
2152                 patch.setStart1(start1 - precontext.length());
2153                 patch.setStart2(start2 - precontext.length());
2154                 if (!precontext.isEmpty()) {
2155                     patch.setLength1(patch.setLength2(precontext.length()));
2156                     patch.getDiffs().add(new Diff(Operation.EQUAL, precontext));
2157                 }
2158                 while (!bigpatch.getDiffs().isEmpty()
2159                         && patch.getLength1() < patchSize - DiffMatchPatch.PATCH_MARGIN) {
2160                     diffType = bigpatch.getDiffs().getFirst().getOperation();
2161                     diffText = bigpatch.getDiffs().getFirst().getText();
2162 
2163                     if (diffType == Operation.INSERT) {
2164                         // Insertions are harmless.
2165                         patch.setLength2(patch.getLength2() + diffText.length());
2166                         start2 += diffText.length();
2167                         patch.getDiffs().addLast(bigpatch.getDiffs().removeFirst());
2168                         empty = false;
2169                     } else if (diffType == Operation.DELETE && patch.getDiffs().size() == 1
2170                             && patch.getDiffs().getFirst().getOperation() == Operation.EQUAL
2171                             && diffText.length() > 2 * patchSize) {
2172                         // This is a large deletion.  Let it pass in one chunk.
2173                         patch.setLength1(patch.getLength1() + diffText.length());
2174                         start1 += diffText.length();
2175                         empty = false;
2176                         patch.getDiffs().add(new Diff(diffType, diffText));
2177                         bigpatch.getDiffs().removeFirst();
2178                     } else {
2179                         // Deletion or equality.  Only take as much as we can stomach.
2180                         diffText = diffText.substring(0, Math.min(diffText.length(),
2181                                 patchSize - patch.getLength1() - DiffMatchPatch.PATCH_MARGIN));
2182                         patch.setLength1(patch.getLength1() + diffText.length());
2183                         start1 += diffText.length();
2184                         if (diffType == Operation.EQUAL) {
2185                             patch.setLength2(patch.getLength2() + diffText.length());
2186                             start2 += diffText.length();
2187                         } else {
2188                             empty = false;
2189                         }
2190                         patch.getDiffs().add(new Diff(diffType, diffText));
2191                         if (diffText.equals(bigpatch.getDiffs().getFirst().getText())) {
2192                             bigpatch.getDiffs().removeFirst();
2193                         } else {
2194                             bigpatch.getDiffs().getFirst().setText(bigpatch.getDiffs().getFirst().getText()
2195                                     .substring(diffText.length()));
2196                         }
2197                     }
2198                 }
2199                 // Compute the head context for the next patch.
2200                 precontext = this.diffText2(patch.getDiffs());
2201                 precontext = precontext.substring(Math.max(0, precontext.length()
2202                         - DiffMatchPatch.PATCH_MARGIN));
2203                 // Append the end context for this patch.
2204                 if (this.diffText1(bigpatch.getDiffs()).length() > DiffMatchPatch.PATCH_MARGIN) {
2205                     postcontext = this.diffText1(bigpatch.getDiffs()).substring(0, DiffMatchPatch.PATCH_MARGIN);
2206                 } else {
2207                     postcontext = this.diffText1(bigpatch.getDiffs());
2208                 }
2209 
2210                 if (!postcontext.isEmpty()) {
2211                     patch.setLength1(patch.getLength1() + postcontext.length());
2212                     patch.setLength2(patch.getLength2() + postcontext.length());
2213                     if (!patch.getDiffs().isEmpty()
2214                             && patch.getDiffs().getLast().getOperation() == Operation.EQUAL) {
2215                         patch.getDiffs().getLast().setText(patch.getDiffs().getLast().getText() + postcontext);
2216                     } else {
2217                         patch.getDiffs().add(new Diff(Operation.EQUAL, postcontext));
2218                     }
2219                 }
2220 
2221                 if (!empty) {
2222                     pointer.add(patch);
2223                 }
2224             }
2225             bigpatch = pointer.hasNext() ? pointer.next() : null;
2226         }
2227     }
2228 
2229     /**
2230      * Take a list of patches and return a textual representation.
2231      * @param patches List of Patch objects.
2232      * @return Text representation of patches.
2233      */
2234     public String patchToText(List<Patch> patches) {
2235         StringBuilder text = new StringBuilder();
2236         for (Patch aPatch : patches) {
2237             text.append(aPatch);
2238         }
2239         return text.toString();
2240     }
2241 
2242     /**
2243      * Parse a textual representation of patches and return a List of Patch
2244      * objects.
2245      * @param textline Text representation of patches.
2246      * @return List of Patch objects.
2247      */
2248     public List<Patch> patchFromText(String textline) {
2249 
2250         List<Patch> patches = new LinkedList<>();
2251         if (textline.isEmpty()) {
2252             return patches;
2253         }
2254         List<String> textList = Arrays.asList(textline.split("\n"));
2255         Deque<String> text = new LinkedList<>(textList);
2256         Patch patch;
2257         Pattern patchHeader = Pattern.compile("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$");
2258         Matcher m;
2259         char sign;
2260         String line;
2261         while (!text.isEmpty()) {
2262 
2263             m = patchHeader.matcher(text.getFirst());
2264             if (!m.matches()) {
2265                 throw new IllegalArgumentException(
2266                         "Invalid patch string: " + text.getFirst());
2267             }
2268             patch = new Patch();
2269             patches.add(patch);
2270             patch.setStart1(Integer.parseInt(m.group(1)));
2271             if (m.group(2).isEmpty()) {
2272                 patch.setStart1(patch.getStart1() - 1);
2273                 patch.setLength1(1);
2274             } else if ("0".equals(m.group(2))) {
2275                 patch.setLength1(0);
2276             } else {
2277                 patch.setStart1(patch.getStart1() - 1);
2278                 patch.setLength1(Integer.parseInt(m.group(2)));
2279             }
2280 
2281             patch.setStart2(Integer.parseInt(m.group(3)));
2282             if (m.group(4).isEmpty()) {
2283                 patch.setStart2(patch.getStart2() - 1);
2284                 patch.setLength2(1);
2285             } else if ("0".equals(m.group(4))) {
2286                 patch.setLength2(0);
2287             } else {
2288                 patch.setStart2(patch.getStart2() - 1);
2289                 patch.setLength2(Integer.parseInt(m.group(4)));
2290             }
2291             text.removeFirst();
2292 
2293             while (!text.isEmpty()) {
2294 
2295                 try {
2296                     sign = text.getFirst().charAt(0);
2297                 } catch (IndexOutOfBoundsException e) {
2298                     LOGGER.log(LogLevelUtil.IGNORE, e);
2299 
2300                     // Blank line?  Whatever.
2301                     text.removeFirst();
2302                     continue;
2303                 }
2304 
2305                 line = text.getFirst().substring(1);
2306                 line = line.replace("+", "%2B");  // decode would change all "+" to " "
2307                 try {
2308                     line = URLDecoder.decode(line, StandardCharsets.UTF_8);
2309                 } catch (IllegalArgumentException e) {
2310                     // Malformed URI sequence.
2311                     throw new IllegalArgumentException(
2312                             "Illegal escape in patch_fromText: " + line, e);
2313                 }
2314 
2315                 if (sign == '-') {
2316                     // Deletion.
2317                     patch.getDiffs().add(new Diff(Operation.DELETE, line));
2318                 } else if (sign == '+') {
2319                     // Insertion.
2320                     patch.getDiffs().add(new Diff(Operation.INSERT, line));
2321                 } else if (sign == ' ') {
2322                     // Minor equality.
2323                     patch.getDiffs().add(new Diff(Operation.EQUAL, line));
2324                 } else if (sign == '@') {
2325                     // Start of next patch.
2326                     break;
2327                 } else {
2328                     // WTF?
2329                     throw new IllegalArgumentException(
2330                             "Invalid patch mode '" + sign + "' in: " + line);
2331                 }
2332                 text.removeFirst();
2333             }
2334         }
2335         return patches;
2336     }
2337 }