Fix error with redundant line breaks

This fixes an error with a repository, where an added file has \r\r\n
as line breaks. This interesting combination is handled as a single
line break by the diff operation, while Java's Scanner implementation
handles this as two lines (the first one delimited by the first \r,
the second one delimited by \r\n. This led to empty lines inside the
diff, where we only expect lines that contain at least one character
(' ', '+' or '-'), and this in turn led to an index out of bounds
exception.

Now we handle each combination of any kind of new line delimiter
characters as a single delimiter. This should be safe, because, as
mentioned earlier, we always expect at least one character in a line
for a diff output.
This commit is contained in:
René Pfeuffer
2020-05-04 15:21:57 +02:00
parent ffcc146f75
commit 52a09a96d2
3 changed files with 33 additions and 5 deletions

View File

@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed
- Protocol URI for git commands under windows ([#1108](https://github.com/scm-manager/scm-manager/pull/1108))
- Fix usage of invalid cipher algorith on newer java versions ([#1110](https://github.com/scm-manager/scm-manager/issues/1110),[#1112](https://github.com/scm-manager/scm-manager/pull/1112))
- Handle obscure line breaks in diff viewer ([#1129](https://github.com/scm-manager/scm-manager/pull/1129))
## [2.0.0-rc7] - 2020-04-09
### Added

View File

@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package sonia.scm.repository.spi;
import sonia.scm.repository.api.DiffLine;
@@ -49,9 +49,9 @@ final class GitHunkParser {
public List<Hunk> parse(String content) {
List<Hunk> hunks = new ArrayList<>();
try (Scanner scanner = new Scanner(content)) {
while (scanner.hasNextLine()) {
String line = scanner.nextLine();
try (Scanner scanner = new Scanner(content).useDelimiter("[\n\r\u2028\u2029\u0085]+")) {
while (scanner.hasNext()) {
String line = scanner.next();
if (line.startsWith("@@")) {
parseHeader(hunks, line);
} else if (currentGitHunk != null) {

View File

@@ -21,7 +21,7 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package sonia.scm.repository.spi;
import org.junit.jupiter.api.Test;
@@ -103,6 +103,15 @@ class GitHunkParserTest {
"+added line\n" +
"\\ No newline at end of file\n";
private static final String MULTIPLE_LINE_BREAKS = "diff --git a/.editorconfig b/.editorconfig\n" +
"index ea2a3ba..2f02f32 100644\n" +
"--- a/.editorconfig\n" +
"+++ b/.editorconfig\n" +
"@@ -10,3 +10,4 @@\n" +
" indent_style = space\r\r\n" +
" indent_size = 2\r\r\n" +
" charset = utf-8\n";
@Test
void shouldParseHunks() {
List<Hunk> hunks = new GitHunkParser().parse(DIFF_001);
@@ -183,6 +192,24 @@ class GitHunkParserTest {
assertThat(lastLine.getContent()).isEqualTo("added line");
}
@Test
void shouldHandleMultipleLineBreaks() {
List<Hunk> hunks = new GitHunkParser().parse(MULTIPLE_LINE_BREAKS);
Hunk hunk = hunks.get(0);
Iterator<DiffLine> lines = hunk.iterator();
DiffLine line1 = lines.next();
assertThat(line1.getOldLineNumber()).hasValue(10);
assertThat(line1.getNewLineNumber()).hasValue(10);
assertThat(line1.getContent()).isEqualTo("indent_style = space");
lines.next();
lines.next();
assertThat(lines.hasNext()).isFalse();
}
private void assertHunk(Hunk hunk, int oldStart, int oldLineCount, int newStart, int newLineCount) {
assertThat(hunk.getOldStart()).isEqualTo(oldStart);
assertThat(hunk.getOldLineCount()).isEqualTo(oldLineCount);