echo: Support octal, hexadecimal and unicode escape sequences

This commit is contained in:
TheFightingCatfish 2021-08-25 23:57:02 +08:00 committed by Andreas Kling
parent c2f62a03ff
commit c9b384da92
Notes: sideshowbarker 2024-07-18 05:00:10 +09:00
2 changed files with 117 additions and 47 deletions

View file

@ -5,21 +5,51 @@ echo - print the given text
## Synopsis
```**sh
$ echo [-n] text...
$ echo [-ne] [text...]
```
## Description
Print the given *text*, which is passed as argv, to the standard output,
separating arguments with a space character.
Print the given `text` to the standard output. If multiple `text`s are provided, they will be joined with a space character. If no `text` is provided, an empty line will be printed.
Character escape sequences and their meanings are as follows:
`\\a` - `<alert>`
`\\b` - `<backspace>`
`\\c` - Suppress the output of all remaining characters, including the trailing newline.
`\\e` - The escape character (`\\033`).
`\\f` - `<form-feed>`
`\\n` - `<newline>`
`\\r` - `<carriage-return>`
`\\t` - `<tab>`
`\\v` - `<vertical-tab>`
`\\\\` - The backslash character (`\\`).
`\\0ooo` - A byte whose value is a zero, one, two, or three-digit octal number.
`\\xHH` - A byte whose value is a two-digit hexadecimal number.
`\\uHHHH` - An unicode code point whose value is a four-digit hexadecimal number.
## Options
* `-n`: Do not output a trailing newline
* `-e`: Interpret backslash escapes
## Examples
```sh
$ echo hello friends!
hello friends!
$ echo -ne '\x68\x65\x6c\x6c\x6f' 'friends\041\n'
hello friends!
```

View file

@ -4,58 +4,92 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/String.h>
#include <AK/CharacterTypes.h>
#include <AK/GenericLexer.h>
#include <LibCore/ArgsParser.h>
#include <stdio.h>
#include <unistd.h>
static char backslash_escaped_char(char c)
static u8 parse_octal_number(GenericLexer& lexer)
{
switch (c) {
case '\\':
return c;
// `\"` produces `"` with printf(1), but `\"` with echo(1)
case 'a':
return '\a';
case 'b':
return '\b';
case 'e':
return '\e';
case 'f':
return '\f';
case 'n':
return '\n';
case 'r':
return '\r';
case 't':
return '\t';
case 'v':
return '\v';
default:
return c;
u32 value = 0;
for (size_t count = 0; count < 3; ++count) {
auto c = lexer.peek();
if (!(c >= '0' && c <= '7'))
break;
value = value * 8 + (c - '0');
lexer.consume();
}
clamp(value, 0, 255);
return value;
}
static String interpret_backslash_escapes(String s)
static Optional<u8> parse_hex_number(GenericLexer& lexer)
{
u8 value = 0;
for (size_t count = 0; count < 2; ++count) {
auto c = lexer.peek();
if (!is_ascii_hex_digit(c))
return {};
value = value * 16 + parse_ascii_hex_digit(c);
lexer.consume();
}
return value;
}
static String interpret_backslash_escapes(StringView string, bool& no_trailing_newline)
{
static constexpr auto escape_map = "a\ab\be\ef\fn\nr\rt\tv\v"sv;
static constexpr auto unescaped_chars = "\a\b\e\f\n\r\t\v\\"sv;
StringBuilder builder;
GenericLexer lexer { string };
for (size_t i = 0; i < s.length();) {
if (char c = s[i++]; c != '\\') {
builder.append(c);
continue;
while (!lexer.is_eof()) {
auto this_index = lexer.tell();
auto this_char = lexer.consume();
if (this_char == '\\') {
if (lexer.is_eof()) {
builder.append('\\');
break;
}
auto next_char = lexer.peek();
if (next_char == 'c') {
no_trailing_newline = true;
break;
}
if (next_char == '0') {
lexer.consume();
auto octal_number = parse_octal_number(lexer);
builder.append(octal_number);
} else if (next_char == 'x') {
lexer.consume();
auto maybe_hex_number = parse_hex_number(lexer);
if (!maybe_hex_number.has_value()) {
auto bad_substring = string.substring_view(this_index, lexer.tell() - this_index);
builder.append(bad_substring);
} else {
builder.append(maybe_hex_number.release_value());
}
} else if (next_char == 'u') {
lexer.retreat();
auto maybe_code_point = lexer.consume_escaped_code_point();
if (maybe_code_point.is_error()) {
auto bad_substring = string.substring_view(this_index, lexer.tell() - this_index);
builder.append(bad_substring);
} else {
builder.append_code_point(maybe_code_point.release_value());
}
} else {
lexer.retreat();
auto consumed_char = lexer.consume_escaped_character('\\', escape_map);
if (!unescaped_chars.contains(consumed_char))
builder.append('\\');
builder.append(consumed_char);
}
} else {
builder.append(this_char);
}
if (i == s.length()) {
// Last character of string is '\' -- output it verbatim.
builder.append('\\');
}
char c = s[i++];
if (c == 'c') // `\c` suppresses further output.
break;
// FIXME: \0ooo, \xHH, \uHHHH, \UHHHHHHHH should produce characters if followed by
// enough digits.
builder.append(backslash_escaped_char(c));
}
return builder.build();
@ -68,19 +102,25 @@ int main(int argc, char** argv)
return 1;
}
Vector<const char*> values;
Vector<const char*> text;
bool no_trailing_newline = false;
bool should_interpret_backslash_escapes = false;
Core::ArgsParser args_parser;
args_parser.add_option(no_trailing_newline, "Do not output a trailing newline", nullptr, 'n');
args_parser.add_option(should_interpret_backslash_escapes, "Interpret backslash escapes", nullptr, 'e');
args_parser.add_positional_argument(values, "Values to print out", "string", Core::ArgsParser::Required::No);
args_parser.add_positional_argument(text, "Text to print out", "text", Core::ArgsParser::Required::No);
args_parser.set_stop_on_first_non_option(true);
args_parser.parse(argc, argv);
String output = String::join(' ', values);
if (text.is_empty()) {
outln();
return 0;
}
auto output = String::join(' ', text);
if (should_interpret_backslash_escapes)
output = interpret_backslash_escapes(move(output));
output = interpret_backslash_escapes(output, no_trailing_newline);
out("{}", output);
if (!no_trailing_newline)
outln();