| 1 | /* |
|---|---|
| 2 | * Copyright (C) 2007-2010 JĂșlio Vilmar Gesser. |
| 3 | * Copyright (C) 2011, 2013-2020 The JavaParser Team. |
| 4 | * |
| 5 | * This file is part of JavaParser. |
| 6 | * |
| 7 | * JavaParser can be used either under the terms of |
| 8 | * a) the GNU Lesser General Public License as published by |
| 9 | * the Free Software Foundation, either version 3 of the License, or |
| 10 | * (at your option) any later version. |
| 11 | * b) the terms of the Apache License |
| 12 | * |
| 13 | * You should have received a copy of both licenses in LICENCE.LGPL and |
| 14 | * LICENCE.APACHE. Please refer to those files for details. |
| 15 | * |
| 16 | * JavaParser is distributed in the hope that it will be useful, |
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 19 | * GNU Lesser General Public License for more details. |
| 20 | */ |
| 21 | package com.github.javaparser; |
| 22 | |
| 23 | import com.github.javaparser.utils.LineSeparator; |
| 24 | |
| 25 | import java.io.IOException; |
| 26 | import java.util.HashMap; |
| 27 | import java.util.Map; |
| 28 | import java.util.Optional; |
| 29 | |
| 30 | /** |
| 31 | * {@link Provider} un-escaping unicode escape sequences in the input sequence. |
| 32 | */ |
| 33 | public class LineEndingProcessingProvider implements Provider { |
| 34 | |
| 35 | private static final int EOF = -1; |
| 36 | |
| 37 | private static final int DEFAULT_BUFFER_SIZE = 2048; |
| 38 | |
| 39 | /** |
| 40 | * The "other" provider which we are wrapping around / reading from. |
| 41 | */ |
| 42 | private final Provider _input; |
| 43 | |
| 44 | /** |
| 45 | * The buffer that we're storing data within. |
| 46 | */ |
| 47 | private final char[] _data; |
| 48 | |
| 49 | /** |
| 50 | * The number of characters in {@link #_data}. |
| 51 | */ |
| 52 | private int _len = 0; |
| 53 | |
| 54 | /** |
| 55 | * The position in {@link #_data} where to read the next source character from. |
| 56 | */ |
| 57 | private int _pos = 0; |
| 58 | |
| 59 | private final Map<LineSeparator, Integer> eolCounts = new HashMap<>(); |
| 60 | |
| 61 | public LineEndingProcessingProvider(Provider input) { |
| 62 | this(DEFAULT_BUFFER_SIZE, input); |
| 63 | } |
| 64 | |
| 65 | public LineEndingProcessingProvider(int bufferSize, Provider input) { |
| 66 | _input = input; |
| 67 | _data = new char[bufferSize]; |
| 68 | } |
| 69 | |
| 70 | @Override |
| 71 | public void close() throws IOException { |
| 72 | _input.close(); |
| 73 | } |
| 74 | |
| 75 | private int fillBuffer() throws IOException { |
| 76 | _pos = 0; |
| 77 | int direct = _input.read(_data, 0, _data.length); |
| 78 | if (direct != 0) { |
| 79 | _len = direct; |
| 80 | } |
| 81 | return direct; |
| 82 | } |
| 83 | |
| 84 | public LineSeparator getDetectedLineEnding() { |
| 85 | return LineSeparator.getLineEnding( |
| 86 | eolCounts.getOrDefault(LineSeparator.CR, 0), |
| 87 | eolCounts.getOrDefault(LineSeparator.LF, 0), |
| 88 | eolCounts.getOrDefault(LineSeparator.CRLF, 0) |
| 89 | ); |
| 90 | } |
| 91 | |
| 92 | private boolean isBufferEmpty() { |
| 93 | return _pos >= _len; |
| 94 | } |
| 95 | |
| 96 | /** |
| 97 | * Retrieves the next un-escaped character from the buffered {@link #_input}. |
| 98 | * |
| 99 | * @return The next character or {@code -1} if no more input is available. |
| 100 | */ |
| 101 | private int nextBufferedChar() throws IOException { |
| 102 | while (isBufferEmpty()) { |
| 103 | int direct = fillBuffer(); |
| 104 | if (direct < 0) { |
| 105 | return EOF; |
| 106 | } |
| 107 | } |
| 108 | return _data[_pos++]; |
| 109 | } |
| 110 | |
| 111 | @Override |
| 112 | public int read(char[] buffer, final int offset, int len) throws IOException { |
| 113 | int pos = offset; |
| 114 | int stop = offset + len; |
| 115 | LineSeparator previousLineSeparator = null; |
| 116 | while (pos < stop) { |
| 117 | int ch = nextBufferedChar(); |
| 118 | if (ch < 0) { |
| 119 | if (pos == offset) { |
| 120 | // Nothing read yet, this is the end of the stream. |
| 121 | return EOF; |
| 122 | } else { |
| 123 | break; |
| 124 | } |
| 125 | } else { |
| 126 | String str = String.valueOf((char) ch); |
| 127 | Optional<LineSeparator> lookup = LineSeparator.lookup(str); |
| 128 | |
| 129 | if (lookup.isPresent()) { |
| 130 | LineSeparator lineSeparator = lookup.get(); |
| 131 | |
| 132 | // Track the number of times this character is found.. |
| 133 | eolCounts.putIfAbsent(lineSeparator, 0); |
| 134 | eolCounts.put(lineSeparator, eolCounts.get(lineSeparator) + 1); |
| 135 | |
| 136 | // Handle line separators of length two (specifically CRLF) |
| 137 | // TODO: Make this more generic than just CRLF (e.g. track the previous char rather than the previous line separator |
| 138 | if (lineSeparator == LineSeparator.LF) { |
| 139 | if (previousLineSeparator == LineSeparator.CR) { |
| 140 | eolCounts.putIfAbsent(LineSeparator.CRLF, 0); |
| 141 | eolCounts.put(LineSeparator.CRLF, eolCounts.get(LineSeparator.CRLF) + 1); |
| 142 | } |
| 143 | } |
| 144 | |
| 145 | // If "this" (current) char <strong>is</strong> a line separator, set the next loop's "previous" to this |
| 146 | previousLineSeparator = lineSeparator; |
| 147 | } else { |
| 148 | // If "this" (current) char <strong>is not</strong> a line separator, set the next loop's "previous" to null |
| 149 | previousLineSeparator = null; |
| 150 | } |
| 151 | |
| 152 | // Move to next character |
| 153 | buffer[pos++] = (char) ch; |
| 154 | } |
| 155 | } |
| 156 | return pos - offset; |
| 157 | } |
| 158 | |
| 159 | } |
| 160 |
Members