1 /*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements. See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17 package org.apache.commons.fileupload2.core;
18
19 import java.io.UnsupportedEncodingException;
20 import java.util.HashMap;
21 import java.util.Locale;
22 import java.util.Map;
23
24 /**
25 * A simple parser intended to parse sequences of name/value pairs.
26 * <p>
27 * Parameter values are expected to be enclosed in quotes if they contain unsafe characters, such as '=' characters or separators. Parameter values are optional
28 * and can be omitted.
29 * </p>
30 * <p>
31 * {@code param1 = value; param2 = "anything goes; really"; param3}
32 * </p>
33 */
34 public class ParameterParser {
35
36 /**
37 * String to be parsed.
38 */
39 private char[] chars;
40
41 /**
42 * Current position in the string.
43 */
44 private int pos;
45
46 /**
47 * Maximum position in the string.
48 */
49 private int len;
50
51 /**
52 * Start of a token.
53 */
54 private int i1;
55
56 /**
57 * End of a token.
58 */
59 private int i2;
60
61 /**
62 * Whether names stored in the map should be converted to lower case.
63 */
64 private boolean lowerCaseNames;
65
66 /**
67 * Default ParameterParser constructor.
68 */
69 public ParameterParser() {
70 }
71
72 /**
73 * A helper method to process the parsed token. This method removes leading and trailing blanks as well as enclosing quotation marks, when necessary.
74 *
75 * @param quoted {@code true} if quotation marks are expected, {@code false} otherwise.
76 * @return the token
77 */
78 private String getToken(final boolean quoted) {
79 // Trim leading white spaces
80 while (i1 < i2 && Character.isWhitespace(chars[i1])) {
81 i1++;
82 }
83 // Trim trailing white spaces
84 while (i2 > i1 && Character.isWhitespace(chars[i2 - 1])) {
85 i2--;
86 }
87 // Strip away quotation marks if necessary
88 if (quoted && i2 - i1 >= 2 && chars[i1] == '"' && chars[i2 - 1] == '"') {
89 i1++;
90 i2--;
91 }
92 String result = null;
93 if (i2 > i1) {
94 result = new String(chars, i1, i2 - i1);
95 }
96 return result;
97 }
98
99 /**
100 * Tests if there any characters left to parse.
101 *
102 * @return {@code true} if there are unparsed characters, {@code false} otherwise.
103 */
104 private boolean hasChar() {
105 return this.pos < this.len;
106 }
107
108 /**
109 * Tests {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed.
110 *
111 * @return {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed. Otherwise returns {@code false}
112 */
113 public boolean isLowerCaseNames() {
114 return this.lowerCaseNames;
115 }
116
117 /**
118 * Tests if the given character is present in the array of characters.
119 *
120 * @param ch the character to test for presence in the array of characters
121 * @param charray the array of characters to test against
122 * @return {@code true} if the character is present in the array of characters, {@code false} otherwise.
123 */
124 private boolean isOneOf(final char ch, final char[] charray) {
125 var result = false;
126 for (final char element : charray) {
127 if (ch == element) {
128 result = true;
129 break;
130 }
131 }
132 return result;
133 }
134
135 /**
136 * Parses a map of name/value pairs from the given array of characters. Names are expected to be unique.
137 *
138 * @param charArray the array of characters that contains a sequence of name/value pairs
139 * @param separator the name/value pairs separator
140 * @return a map of name/value pairs
141 */
142 public Map<String, String> parse(final char[] charArray, final char separator) {
143 if (charArray == null) {
144 return new HashMap<>();
145 }
146 return parse(charArray, 0, charArray.length, separator);
147 }
148
149 /**
150 * Parses a map of name/value pairs from the given array of characters. Names are expected to be unique.
151 *
152 * @param charArray the array of characters that contains a sequence of name/value pairs
153 * @param offset - the initial offset.
154 * @param length - the length.
155 * @param separator the name/value pairs separator
156 * @return a map of name/value pairs
157 */
158 public Map<String, String> parse(final char[] charArray, final int offset, final int length, final char separator) {
159
160 if (charArray == null) {
161 return new HashMap<>();
162 }
163 final var params = new HashMap<String, String>();
164 this.chars = charArray.clone();
165 this.pos = offset;
166 this.len = length;
167
168 String paramName;
169 String paramValue;
170 while (hasChar()) {
171 paramName = parseToken(new char[] { '=', separator });
172 paramValue = null;
173 if (hasChar() && charArray[pos] == '=') {
174 pos++; // skip '='
175 paramValue = parseQuotedToken(new char[] { separator });
176
177 if (paramValue != null) {
178 try {
179 paramValue = RFC2231Utils.hasEncodedValue(paramName) ? RFC2231Utils.decodeText(paramValue) : MimeUtils.decodeText(paramValue);
180 } catch (final UnsupportedEncodingException ignored) {
181 // let's keep the original value in this case
182 }
183 }
184 }
185 if (hasChar() && charArray[pos] == separator) {
186 pos++; // skip separator
187 }
188 if (paramName != null && !paramName.isEmpty()) {
189 paramName = RFC2231Utils.stripDelimiter(paramName);
190 if (this.lowerCaseNames) {
191 paramName = paramName.toLowerCase(Locale.ENGLISH);
192 }
193 params.put(paramName, paramValue);
194 }
195 }
196 return params;
197 }
198
199 /**
200 * Parses a map of name/value pairs from the given string. Names are expected to be unique.
201 *
202 * @param str the string that contains a sequence of name/value pairs
203 * @param separator the name/value pairs separator
204 * @return a map of name/value pairs
205 */
206 public Map<String, String> parse(final String str, final char separator) {
207 if (str == null) {
208 return new HashMap<>();
209 }
210 return parse(str.toCharArray(), separator);
211 }
212
213 /**
214 * Parses a map of name/value pairs from the given string. Names are expected to be unique. Multiple separators may be specified and the earliest found in
215 * the input string is used.
216 *
217 * @param str the string that contains a sequence of name/value pairs
218 * @param separators the name/value pairs separators
219 * @return a map of name/value pairs
220 */
221 public Map<String, String> parse(final String str, final char[] separators) {
222 if (separators == null || separators.length == 0) {
223 return new HashMap<>();
224 }
225 var separator = separators[0];
226 if (str != null) {
227 var idx = str.length();
228 for (final char separator2 : separators) {
229 final var tmp = str.indexOf(separator2);
230 if (tmp != -1 && tmp < idx) {
231 idx = tmp;
232 separator = separator2;
233 }
234 }
235 }
236 return parse(str, separator);
237 }
238
239 /**
240 * Parses out a token until any of the given terminators is encountered outside the quotation marks.
241 *
242 * @param terminators the array of terminating characters. Any of these characters when encountered outside the quotation marks signify the end of the token
243 * @return the token
244 */
245 private String parseQuotedToken(final char[] terminators) {
246 char ch;
247 i1 = pos;
248 i2 = pos;
249 var quoted = false;
250 var charEscaped = false;
251 while (hasChar()) {
252 ch = chars[pos];
253 if (!quoted && isOneOf(ch, terminators)) {
254 break;
255 }
256 if (!charEscaped && ch == '"') {
257 quoted = !quoted;
258 }
259 charEscaped = !charEscaped && ch == '\\';
260 i2++;
261 pos++;
262
263 }
264 return getToken(true);
265 }
266
267 /**
268 * Parses out a token until any of the given terminators is encountered.
269 *
270 * @param terminators the array of terminating characters. Any of these characters when encountered signify the end of the token
271 * @return the token
272 */
273 private String parseToken(final char[] terminators) {
274 char ch;
275 i1 = pos;
276 i2 = pos;
277 while (hasChar()) {
278 ch = chars[pos];
279 if (isOneOf(ch, terminators)) {
280 break;
281 }
282 i2++;
283 pos++;
284 }
285 return getToken(false);
286 }
287
288 /**
289 * Sets the flag if parameter names are to be converted to lower case when name/value pairs are parsed.
290 *
291 * @param lowerCaseNames {@code true} if parameter names are to be converted to lower case when name/value pairs are parsed. {@code false} otherwise.
292 */
293 public void setLowerCaseNames(final boolean lowerCaseNames) {
294 this.lowerCaseNames = lowerCaseNames;
295 }
296
297 }