Options.java
// Copyright (c) ZeroC, Inc.
package com.zeroc.Ice;
import java.util.ArrayList;
import java.util.List;
/**
* @hidden Public because it's used by IceBox.
*/
public final class Options {
public static String[] split(String line) throws ParseException {
final String IFS = " \t\n";
final int NormalState = 1;
final int DoubleQuoteState = 2;
final int SingleQuoteState = 3;
final int ANSIQuoteState = 4;
line = line.trim();
if (line.isEmpty()) {
return new String[0];
}
int state = NormalState;
StringBuilder arg = new StringBuilder(128);
List<String> vec = new ArrayList<>();
for (int i = 0; i < line.length(); i++) {
char c = line.charAt(i);
switch (state) {
case NormalState: {
switch (c) {
case '\\' -> {
// Ignore a backslash at the end of the string, and strip backslash-newline pairs.
//
// If a backslash comes before a space, single quote, double
// quote, or dollar sign we drop the backslash, but still write
// the the space, quote, or dollar sign. This is necessary to
// allow quotes to be escaped. Dropping the backslash preceding
// a space deviates from bash quoting rules, but is necessary so
// we don't drop backslashes from Windows path names.
if (i < line.length() - 1 && line.charAt(++i) != '\n') {
char nextChar = line.charAt(i);
// TODO: comment says we should be checking single quotes here, but we aren't?
if (nextChar != ' ' && nextChar != '$' && nextChar != '\\' && nextChar != '"') {
arg.append('\\');
}
arg.append(nextChar);
}
}
case '\'' -> {
state = SingleQuoteState;
}
case '"' -> {
state = DoubleQuoteState;
}
case '$' -> {
if (i < line.length() - 1 && line.charAt(i + 1) == '\'') {
// Bash uses $'<text>' to allow ANSI escape sequences within <text>.
state = ANSIQuoteState;
++i;
} else {
arg.append('$');
}
}
default -> {
if (IFS.indexOf(line.charAt(i)) != -1) {
vec.add(arg.toString());
arg = new StringBuilder(128);
// Move to start of next argument.
while (++i < line.length() && IFS.indexOf(line.charAt(i)) != -1) {
continue;
}
--i;
} else {
arg.append(line.charAt(i));
}
}
}
break;
}
case DoubleQuoteState: {
// Within double quotes, only backslash retains its special meaning,
// and only if followed by a double quote, backslash, or newline.
// Both the backslash and the character are preserved for any other
// character.
if (c == '\\' && i < line.length() - 1) {
c = line.charAt(++i);
if (c != '"' && c != '\\' && c != '\n') {
arg.append('\\');
}
arg.append(c);
} else if (c == '"') {
// End of double-quote mode.
state = NormalState;
} else {
// Everything else is taken literally.
arg.append(c);
}
break;
}
case SingleQuoteState: {
if (c == '\'') {
// End of single-quote mode.
state = NormalState;
} else {
// Everything else is taken literally.
arg.append(c);
}
break;
}
case ANSIQuoteState: {
switch (c) {
case '\\': {
if (i == line.length() - 1) {
break;
}
switch (c = line.charAt(++i)) {
// Single-letter escape sequences.
case 'a' -> arg.append('\007');
case 'b' -> arg.append('\b');
case 'f' -> arg.append('\f');
case 'n' -> arg.append('\n');
case 'r' -> arg.append('\r');
case 't' -> arg.append('\t');
case 'v' -> arg.append('\013');
case '\\' -> arg.append('\\');
case '\'' -> arg.append('\'');
// Not ANSI-C, but used by bash.
case 'e' -> arg.append('\033');
// Process up to three octal digits.
case '0', '1', '2', '3', '4', '5', '6', '7' -> {
final String octalDigits = "01234567";
short us = 0;
int j;
for (j = i;
j < i + 3
&& j < line.length()
&& octalDigits.indexOf(
c = line.charAt(j))
!= -1;
j++) {
us = (short) (us * 8 + c - '0');
}
i = j - 1;
arg.append((char) us);
}
// Process up to two hex digits.
case 'x' -> {
final String hexDigits = "0123456789abcdefABCDEF";
if (i < line.length() - 1 && hexDigits.indexOf(line.charAt(i + 1)) == -1) {
arg.append('\\');
arg.append('x');
} else {
short s = 0;
int j;
for (j = i + 1; j < i + 3 && j < line.length()
&& hexDigits.indexOf(c = line.charAt(j)) != -1; j++) {
s *= (short) 16;
if (Character.isDigit(c)) {
s += (short) (c - '0');
} else if (Character.isLowerCase(c)) {
s += (short) (c - 'a' + 10);
} else {
s += (short) (c - 'A' + 10);
}
}
i = j - 1;
arg.append((char) s);
}
}
// Process control-chars.
case 'c' -> {
c = line.charAt(++i);
if ((Character.toUpperCase(c) >= 'A'
&& Character.toUpperCase(c) <= 'Z')
|| c == '@'
|| (c >= '[' && c <= '_')) {
arg.append((char) (Character.toUpperCase(c) - '@'));
} else {
// Bash does not define what should happen if a
// \c is not followed by a recognized control character.
// We simply treat this case like other
// unrecognized escape sequences, that is, we
// preserve the escape sequence unchanged.
arg.append('\\');
arg.append('c');
arg.append(c);
}
}
// If inside an ANSI-quoted string, a backslash isn't followed by
// one of the recognized characters, both the backslash and
// the character are preserved.
default -> {
arg.append('\\');
arg.append(c);
}
}
break;
}
// End of ANSI-quote mode.
case '\'': {
state = NormalState;
break;
}
// Everything else is taken literally.
default: {
arg.append(c);
break;
}
}
break;
}
default: {
assert false;
}
}
}
switch (state) {
case NormalState -> vec.add(arg.toString());
case SingleQuoteState -> throw new ParseException("missing closing single quote");
case DoubleQuoteState -> throw new ParseException("missing closing double quote");
case ANSIQuoteState -> throw new ParseException("unterminated $' quote");
default -> throw new AssertionError();
}
return vec.toArray(new String[0]);
}
private Options() {}
}