From 93548551e10d3220f390434b9e461de07a86c4d8 Mon Sep 17 00:00:00 2001 From: PranavRJoshi Date: Wed, 6 May 2026 12:40:32 +0545 Subject: [PATCH 1/7] feat: introduce e substituion for sed When the s command is requested for sed, the e substitution flag enables execution of the result of substitution as a shell command. It matches the GNU sed behavior. On src/sed/command.rs, add 'execute' field to 'Substitution' structure. On src/sed/compiler.rs, handle 'e' in 'compile_subst_flags' and reject it if --posix or --sandbox flag is used in sed. On src/sed/processor.rs, spawn /bin/sh with -c flag for a successful match and replace the pattern space with the command's standard output stream. Signed-off-by: PranavRJoshi --- src/sed/command.rs | 1 + src/sed/compiler.rs | 64 ++++++++++++++++++++++++++++++++++++-------- src/sed/processor.rs | 23 ++++++++++++++++ 3 files changed, 77 insertions(+), 11 deletions(-) diff --git a/src/sed/command.rs b/src/sed/command.rs index 250bcbc5..ea40e6a1 100644 --- a/src/sed/command.rs +++ b/src/sed/command.rs @@ -202,6 +202,7 @@ pub struct Substitution { pub occurrence: usize, // Which occurrence to substitute pub print_flag: bool, // True if 'p' flag pub ignore_case: bool, // True if 'I' flag + pub execute: bool, // True if 'e' flag (GNU extension) pub write_file: Option>>, // Writer to file if 'w' flag is used } diff --git a/src/sed/compiler.rs b/src/sed/compiler.rs index bcfff315..e460234d 100644 --- a/src/sed/compiler.rs +++ b/src/sed/compiler.rs @@ -791,7 +791,7 @@ fn compile_subst_command( let mut subst = Box::new(Substitution::default()); subst.replacement = compile_replacement(lines, line)?; - compile_subst_flags(lines, line, &mut subst)?; + compile_subst_flags(lines, line, &mut subst, context.posix, context.sandbox)?; if pattern.is_empty() && subst.ignore_case { return compilation_error( @@ -865,12 +865,15 @@ pub fn compile_subst_flags( lines: &ScriptLineProvider, line: &mut ScriptCharProvider, subst: &mut Substitution, + posix: bool, + sandbox: bool, ) -> UResult<()> { let mut seen_g_or_n = false; subst.occurrence = 1; // default subst.print_flag = false; subst.ignore_case = false; + subst.execute = false; subst.write_file = None; loop { @@ -903,6 +906,18 @@ pub fn compile_subst_flags( line.advance(); } + 'e' => { + if posix || sandbox { + return compilation_error( + lines, + line, + "the 'e' substitute flag is not allowed with --posix or --sandbox", + ); + } + subst.execute = true; + line.advance(); + } + _c @ '1'..='9' => { if seen_g_or_n { return compilation_error( @@ -2033,7 +2048,7 @@ mod tests { let (lines, mut chars) = make_providers("g"); let mut subst = Substitution::default(); - compile_subst_flags(&lines, &mut chars, &mut subst).unwrap(); + compile_subst_flags(&lines, &mut chars, &mut subst, false, false).unwrap(); assert_eq!(subst.occurrence, 0); // 'g' means all occurrences } @@ -2042,7 +2057,7 @@ mod tests { let (lines, mut chars) = make_providers("p"); let mut subst = Substitution::default(); - compile_subst_flags(&lines, &mut chars, &mut subst).unwrap(); + compile_subst_flags(&lines, &mut chars, &mut subst, false, false).unwrap(); assert!(subst.print_flag); } @@ -2051,7 +2066,7 @@ mod tests { let (lines, mut chars) = make_providers("I"); let mut subst = Substitution::default(); - compile_subst_flags(&lines, &mut chars, &mut subst).unwrap(); + compile_subst_flags(&lines, &mut chars, &mut subst, false, false).unwrap(); assert!(subst.ignore_case); } @@ -2060,7 +2075,7 @@ mod tests { let (lines, mut chars) = make_providers("i"); let mut subst = Substitution::default(); - compile_subst_flags(&lines, &mut chars, &mut subst).unwrap(); + compile_subst_flags(&lines, &mut chars, &mut subst, false, false).unwrap(); assert!(subst.ignore_case); } @@ -2069,7 +2084,7 @@ mod tests { let (lines, mut chars) = make_providers("3"); let mut subst = Substitution::default(); - compile_subst_flags(&lines, &mut chars, &mut subst).unwrap(); + compile_subst_flags(&lines, &mut chars, &mut subst, false, false).unwrap(); assert_eq!(subst.occurrence, 3); } @@ -2078,7 +2093,7 @@ mod tests { let (lines, mut chars) = make_providers("g3"); let mut subst = Substitution::default(); - let err = compile_subst_flags(&lines, &mut chars, &mut subst).unwrap_err(); + let err = compile_subst_flags(&lines, &mut chars, &mut subst, false, false).unwrap_err(); assert!( err.to_string() .contains("multiple 'g' or numeric flags in substitute command") @@ -2090,7 +2105,7 @@ mod tests { let (lines, mut chars) = make_providers("2g"); let mut subst = Substitution::default(); - let err = compile_subst_flags(&lines, &mut chars, &mut subst).unwrap_err(); + let err = compile_subst_flags(&lines, &mut chars, &mut subst, false, false).unwrap_err(); assert!( err.to_string() .contains("multiple 'g' or numeric flags in substitute command") @@ -2102,7 +2117,7 @@ mod tests { let (lines, mut chars) = make_providers("w "); let mut subst = Substitution::default(); - let err = compile_subst_flags(&lines, &mut chars, &mut subst).unwrap_err(); + let err = compile_subst_flags(&lines, &mut chars, &mut subst, false, false).unwrap_err(); assert!(err.to_string().contains("missing file path")); } @@ -2113,19 +2128,46 @@ mod tests { let (lines, mut chars) = make_providers(&format!("w {}", out.display())); let mut subst = Substitution::default(); - compile_subst_flags(&lines, &mut chars, &mut subst).unwrap(); + compile_subst_flags(&lines, &mut chars, &mut subst, false, false).unwrap(); assert_eq!( subst.write_file.as_ref().map(|w| w.borrow().path.clone()), Some(out) ); } + #[test] + fn test_compile_subst_flag_e() { + let (lines, mut chars) = make_providers("e"); + let mut subst = Substitution::default(); + + compile_subst_flags(&lines, &mut chars, &mut subst, false, false).unwrap(); + assert!(subst.execute); + } + + #[test] + fn test_compile_subst_flag_e_rejected_under_posix() { + let (lines, mut chars) = make_providers("e"); + let mut subst = Substitution::default(); + + let err = compile_subst_flags(&lines, &mut chars, &mut subst, true, false).unwrap_err(); + assert!(err.to_string().contains("not allowed with --posix or --sandbox")); + } + + #[test] + fn test_compile_subst_flag_e_rejected_under_sandbox() { + let (lines, mut chars) = make_providers("e"); + let mut subst = Substitution::default(); + + let err = compile_subst_flags(&lines, &mut chars, &mut subst, false, true).unwrap_err(); + assert!(err.to_string().contains("not allowed with --posix or --sandbox")); + } + #[test] fn test_compile_subst_flag_invalid_flag() { let (lines, mut chars) = make_providers("z"); let mut subst = Substitution::default(); - let err = compile_subst_flags(&lines, &mut chars, &mut subst).unwrap_err(); + let err = compile_subst_flags(&lines, &mut chars, &mut subst, false, false).unwrap_err(); assert!(err.to_string().contains("invalid substitute flag")); } diff --git a/src/sed/processor.rs b/src/sed/processor.rs index 45d6b266..e6315e7b 100644 --- a/src/sed/processor.rs +++ b/src/sed/processor.rs @@ -306,6 +306,29 @@ fn substitute( pattern.set_to_string(result, pattern.is_newline_terminated()); + // Execute the pattern space as a shell command if the 'e' flag is set + if sub.execute { + let cmd_str = pattern.as_str()?.to_string(); + let output_bytes = std::process::Command::new("/bin/sh") + .arg("-c") + .arg(&cmd_str) + .output() + .map_err(|e| { + input_runtime_error::<()>( + &command.location, + context, + format!("failed to execute shell command: {e}"), + ) + .unwrap_err() + })?; + let mut shell_out = String::from_utf8_lossy(&output_bytes.stdout).into_owned(); + // Strip the trailing newline, as GNU sed does + if shell_out.ends_with('\n') { + shell_out.pop(); + } + pattern.set_to_string(shell_out, pattern.is_newline_terminated()); + } + if sub.print_flag { write_chunk(output, context, pattern)?; } From 208e94303694df48d0325e04726b4527b188a71c Mon Sep 17 00:00:00 2001 From: PranavRJoshi Date: Wed, 6 May 2026 13:55:54 +0545 Subject: [PATCH 2/7] chore: comply with rustfmt Signed-off-by: PranavRJoshi --- src/sed/compiler.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/sed/compiler.rs b/src/sed/compiler.rs index e460234d..5ab16d0d 100644 --- a/src/sed/compiler.rs +++ b/src/sed/compiler.rs @@ -2150,7 +2150,10 @@ mod tests { let mut subst = Substitution::default(); let err = compile_subst_flags(&lines, &mut chars, &mut subst, true, false).unwrap_err(); - assert!(err.to_string().contains("not allowed with --posix or --sandbox")); + assert!( + err.to_string() + .contains("not allowed with --posix or --sandbox") + ); } #[test] @@ -2159,7 +2162,10 @@ mod tests { let mut subst = Substitution::default(); let err = compile_subst_flags(&lines, &mut chars, &mut subst, false, true).unwrap_err(); - assert!(err.to_string().contains("not allowed with --posix or --sandbox")); + assert!( + err.to_string() + .contains("not allowed with --posix or --sandbox") + ); } #[test] From 9c881b0fa176393ac8a7636f5801a213bda8a1ee Mon Sep 17 00:00:00 2001 From: PranavRJoshi Date: Wed, 6 May 2026 14:23:35 +0545 Subject: [PATCH 3/7] chore: support for Windows command-line Instead of calling bourne shell, define a new function: shell_command() with two definitions; conditionally compiling to UNIX-like and Windows. On UNIX-like, executes the bourne shell, whereas command prompt is executed on Windows. Signed-off-by: PranavRJoshi --- src/sed/processor.rs | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/src/sed/processor.rs b/src/sed/processor.rs index e6315e7b..7d2104fb 100644 --- a/src/sed/processor.rs +++ b/src/sed/processor.rs @@ -195,6 +195,20 @@ fn re_or_saved_re<'a>( } } +#[cfg(unix)] +fn shell_command(cmd: &str) -> std::process::Command { + let mut c = std::process::Command::new("/bin/sh"); + c.arg("-c").arg(cmd); + c +} + +#[cfg(windows)] +fn shell_command(cmd: &str) -> std::process::Command { + let mut c = std::process::Command::new("cmd.exe"); + c.arg("/C").arg(cmd); + c +} + /// Perform the specified RE replacement in the provided pattern space. fn substitute( pattern: &mut IOChunk, @@ -309,18 +323,14 @@ fn substitute( // Execute the pattern space as a shell command if the 'e' flag is set if sub.execute { let cmd_str = pattern.as_str()?.to_string(); - let output_bytes = std::process::Command::new("/bin/sh") - .arg("-c") - .arg(&cmd_str) - .output() - .map_err(|e| { - input_runtime_error::<()>( - &command.location, - context, - format!("failed to execute shell command: {e}"), - ) - .unwrap_err() - })?; + let output_bytes = shell_command(&cmd_str).output().map_err(|e| { + input_runtime_error::<()>( + &command.location, + context, + format!("failed to execute shell command: {e}"), + ) + .unwrap_err() + })?; let mut shell_out = String::from_utf8_lossy(&output_bytes.stdout).into_owned(); // Strip the trailing newline, as GNU sed does if shell_out.ends_with('\n') { From 1eb0a592228b9fb12ccd96c7d9689f836837b237 Mon Sep 17 00:00:00 2001 From: PranavRJoshi Date: Wed, 6 May 2026 14:48:40 +0545 Subject: [PATCH 4/7] chore: check newline for Windows Unix-like system only uses the newline character whereas Windows uses the carriage return as well as newline. Hence, when working on Windows, we need to truncate the last two characters. Signed-off-by: PranavRJoshi --- src/sed/processor.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/sed/processor.rs b/src/sed/processor.rs index 7d2104fb..a8e8bf8e 100644 --- a/src/sed/processor.rs +++ b/src/sed/processor.rs @@ -332,8 +332,11 @@ fn substitute( .unwrap_err() })?; let mut shell_out = String::from_utf8_lossy(&output_bytes.stdout).into_owned(); - // Strip the trailing newline, as GNU sed does - if shell_out.ends_with('\n') { + if shell_out.ends_with("\r\n") { + // On windows, both return carriage and newline characters are used + shell_out.truncate(shell_out.len() - 2) + } else if shell_out.ends_with('\n') { + // Strip the trailing newline, as GNU sed does shell_out.pop(); } pattern.set_to_string(shell_out, pattern.is_newline_terminated()); From cbc6daccaf42a2dec12a16eb59329fbc2266ed88 Mon Sep 17 00:00:00 2001 From: PranavRJoshi Date: Wed, 6 May 2026 14:56:21 +0545 Subject: [PATCH 5/7] add: 'e' flag tests Nine new tests have been introduced in regards to new 'e' substitution option. 'test_subst_e_flag_multiline_outuput' is conditionally compiled as printf command is not guaranteed to be present on all Windows systems. Signed-off-by: PranavRJoshi --- tests/by-util/test_sed.rs | 83 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/tests/by-util/test_sed.rs b/tests/by-util/test_sed.rs index b31fed76..8d8738f5 100644 --- a/tests/by-util/test_sed.rs +++ b/tests/by-util/test_sed.rs @@ -350,6 +350,89 @@ fn subst_write_file() -> std::io::Result<()> { Ok(()) } +#[test] +fn test_subst_e_flag_basic() { + new_ucmd!() + .arg("s/.*/echo hi/e") + .pipe_in("a\n") + .succeeds() + .stdout_is("hi\n"); +} + +#[test] +fn test_subst_e_flag_preserves_unmatched_lines() { + new_ucmd!() + .args(&["-e", "s/^match$/echo replaced/e"]) + .pipe_in("no\nmatch\nno\n") + .succeeds() + .stdout_is("no\nreplaced\nno\n"); +} + +#[test] +fn test_subst_e_flag_strips_trailing_newline() { + // echo produces "hello\n", e flag should strip trailing newline + new_ucmd!() + .arg("s/.*/echo hello/e") + .pipe_in("x\n") + .succeeds() + .stdout_is("hello\n"); +} + +#[test] +#[cfg(unix)] +fn test_subst_e_flag_multiline_output() { + // Command that produces multiple lines + new_ucmd!() + .arg(r#"s/.*/printf 'a\nb'/e"#) + .pipe_in("x\n") + .succeeds() + .stdout_is("a\nb\n"); +} + +#[test] +fn test_subst_e_flag_combined_with_g() { + // e flag with other flags + new_ucmd!().arg("s/x/echo y/ge").pipe_in("x\n").succeeds(); +} + +#[test] +fn test_subst_e_flag_rejected_with_posix() { + // e flag is rejected at compile time if --posix or --sandbox is provided. + new_ucmd!() + .args(&["--posix", "s/.*/echo hi/e"]) + .fails() + .stderr_contains("not allowed with --posix or --sandbox"); +} + +#[test] +fn test_subst_e_flag_rejected_with_sandbox() { + new_ucmd!() + .args(&["--sandbox", "s/.*/echo hi/e"]) + .fails() + .stderr_contains("not allowed with --posix or --sandbox"); +} + +#[test] +fn test_subst_e_flag_command_failure() { + // A non-existent command produces empty output but sed itself succeeds + // (matching GNU sed behavior: the shell runs, the command inside fails) + new_ucmd!() + .arg("s/.*/nonexistent_command/e") + .pipe_in("a\n") + .succeeds() + .stdout_is("\n"); +} + +#[test] +fn test_subst_e_flag_no_match_no_exec() { + // If substitution doesn't match, command should not execute + new_ucmd!() + .arg("s/nomatch/echo bad/e") + .pipe_in("hello\n") + .succeeds() + .stdout_is("hello\n"); +} + //////////////////////////////////////////////////////////// // Transliteration: y check_output!(trans_simple, ["-e", r"y/0123456789/9876543210/", LINES1]); From 35c4b86e60b6d9c18aebb6a3e1dc62b0a53591e4 Mon Sep 17 00:00:00 2001 From: PranavRJoshi Date: Wed, 6 May 2026 15:31:28 +0545 Subject: [PATCH 6/7] chore: fallback for shell_command Executing a shell or a command prompt requires appropriate platforms. WASM does not currently support it. Also fix a linting issue. Signed-off-by: PranavRJoshi --- src/sed/processor.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/sed/processor.rs b/src/sed/processor.rs index a8e8bf8e..a37d00a4 100644 --- a/src/sed/processor.rs +++ b/src/sed/processor.rs @@ -209,6 +209,12 @@ fn shell_command(cmd: &str) -> std::process::Command { c } +// Fallback if the target OS is neither Windows nor UNIX-like +#[cfg(not(any(unix, windows)))] +fn shell_command(_cmd: &str) -> std::process::Command { + unimplemented!("the 'e' substitute flag requires a platform shell (/bin/sh or cmd.exe)") +} + /// Perform the specified RE replacement in the provided pattern space. fn substitute( pattern: &mut IOChunk, @@ -334,7 +340,7 @@ fn substitute( let mut shell_out = String::from_utf8_lossy(&output_bytes.stdout).into_owned(); if shell_out.ends_with("\r\n") { // On windows, both return carriage and newline characters are used - shell_out.truncate(shell_out.len() - 2) + shell_out.truncate(shell_out.len() - 2); } else if shell_out.ends_with('\n') { // Strip the trailing newline, as GNU sed does shell_out.pop(); From ea2cd880a10e19459268a8e1b7039daf8212f7c1 Mon Sep 17 00:00:00 2001 From: PranavRJoshi Date: Wed, 6 May 2026 15:36:06 +0545 Subject: [PATCH 7/7] chore: linting cleanup Signed-off-by: PranavRJoshi --- src/sed/processor.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sed/processor.rs b/src/sed/processor.rs index a37d00a4..85c89e53 100644 --- a/src/sed/processor.rs +++ b/src/sed/processor.rs @@ -212,7 +212,7 @@ fn shell_command(cmd: &str) -> std::process::Command { // Fallback if the target OS is neither Windows nor UNIX-like #[cfg(not(any(unix, windows)))] fn shell_command(_cmd: &str) -> std::process::Command { - unimplemented!("the 'e' substitute flag requires a platform shell (/bin/sh or cmd.exe)") + unimplemented!("the 'e' substitute flag requires a platform shell (/bin/sh or cmd.exe)"); } /// Perform the specified RE replacement in the provided pattern space.