Skip to content

Commit

Permalink
fix(str): remove support for unneeded word boundaries
Browse files Browse the repository at this point in the history
  • Loading branch information
rgrinberg committed Oct 27, 2024
1 parent 9823128 commit 710bc4a
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 9 deletions.
2 changes: 2 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ Unreleased
* Fix [Re.Pcre.split]. Regression introduced in 1.12 and a previous bug with
[Re.Pcre.split] (#538).

* Avoid parsing unnecessary patterns supported `Re.Emacs` in `Re.Str` (#563)

1.13.1 (30-Sep-2024)
--------------------

Expand Down
21 changes: 13 additions & 8 deletions lib/emacs.ml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ let by_code f c c' =
Char.chr (f c c')
;;

let parse s =
let parse ~emacs_only s =
let buf = Parse_buffer.create s in
let accept = Parse_buffer.accept buf in
let eos () = Parse_buffer.eos buf in
Expand Down Expand Up @@ -72,19 +72,19 @@ let parse s =
let r = regexp () in
if not (Parse_buffer.accept_s buf {|\)|}) then raise Parse_error;
Re.group r)
else if accept '`'
else if emacs_only && accept '`'
then Re.bos
else if accept '\''
else if emacs_only && accept '\''
then Re.eos
else if accept '='
then Re.start
else if accept 'b'
then Re.alt [ Re.bow; Re.eow ]
else if accept 'B'
else if emacs_only && accept 'B'
then Re.not_boundary
else if accept '<'
else if emacs_only && accept '<'
then Re.bow
else if accept '>'
else if emacs_only && accept '>'
then Re.eow
else if accept 'w'
then Re.alt [ Re.alnum; Re.char '_' ]
Expand All @@ -95,7 +95,7 @@ let parse s =
match get () with
| ('*' | '+' | '?' | '[' | ']' | '.' | '^' | '$' | '\\') as c -> Re.char c
| '0' .. '9' -> raise Not_supported
| _ -> raise Parse_error)
| c -> if emacs_only then raise Parse_error else Re.char c)
else (
if eos () then raise Parse_error;
match get () with
Expand Down Expand Up @@ -125,7 +125,12 @@ let parse s =
;;

let re ?(case = true) s =
let r = parse s in
let r = parse s ~emacs_only:true in
if case then r else Re.no_case r
;;

let re_no_emacs ~case s =
let r = parse s ~emacs_only:false in
if case then r else Re.no_case r
;;

Expand Down
2 changes: 2 additions & 0 deletions lib/emacs.mli
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,5 @@ val compile : Core.t -> Core.re

(** Same as [Core.compile] *)
val compile_pat : ?case:bool -> string -> Core.re

val re_no_emacs : case:bool -> string -> Core.t
2 changes: 1 addition & 1 deletion lib/str.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ type regexp =
}

let compile_regexp s c =
let re = Emacs.re ~case:(not c) s in
let re = Emacs.re_no_emacs ~case:(not c) s in
{ mtch = lazy (Compile.compile (Ast.seq [ Ast.start; re ]))
; srch = lazy (Compile.compile re)
}
Expand Down
27 changes: 27 additions & 0 deletions lib_test/str/test_str.ml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ module Test_matches (R : Str_intf) = struct
with
| Not_found -> None
;;

let eq_match' ?(pos = 0) ?(case = true) r s =
let pat = if case then R.regexp r else R.regexp_case_fold r in
try
ignore (R.string_match pat s pos);
Some (groups ())
with
| Not_found -> None
;;
end

module T_str = Test_matches (Str)
Expand All @@ -42,6 +51,16 @@ let eq_match ?pos ?case r s =
()
;;

let eq_match' ?pos ?case r s =
expect_equal_app
~msg:(str_printer s)
~printer:(opt_printer (list_printer ofs_printer))
(fun () -> T_str.eq_match' ?pos ?case r s)
()
(fun () -> T_re.eq_match' ?pos ?case r s)
()
;;

let split_result_conv =
List.map (function
| Str.Delim x -> Re.Str.Delim x
Expand Down Expand Up @@ -206,6 +225,14 @@ let _ =
eq_match "[^0-9a-z]+" "A:Z+";
eq_match "[^0-9a-z]+" "0";
eq_match "[^0-9a-z]+" "a");
(* Word modifiers *)
expect_pass "word boundaries" (fun () ->
eq_match' "\\bfoo" "foo";
eq_match' "\\<foo" "foo";
eq_match' "foo\\>" "foo";
eq_match' "z\\Bfoo" "zfoo";
eq_match' "\\`foo" "foo";
eq_match' "foo\\'" "foo");
(* Case modifiers *)
expect_pass "no_case" (fun () ->
eq_match ~case:false "abc" "abc";
Expand Down

0 comments on commit 710bc4a

Please sign in to comment.