Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Regex library #124

Draft
wants to merge 12 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ rayon = { version = "1.5", optional = true }
dbpnoise = { version = "0.1.2", optional = true }
pathfinding = { version = "3.0.13", optional = true }
num = { version = "0.4.0", optional = true }
regex = { version = "1.6.0", optional = true }

[features]
default = [
Expand All @@ -68,6 +69,7 @@ default = [
"json",
"log",
"noise",
"regex",
"sql",
"time",
"toml",
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ The default features are:
* json: Function to check JSON validity.
* log: Faster log output.
* noise: 2d Perlin noise.
* regex: Adds a /datum/rustg_regex replacement for /regex.
* sql: Asynchronous MySQL/MariaDB client library.
* time: High-accuracy time measuring.
* toml: TOML parser.
Expand Down
52 changes: 52 additions & 0 deletions dmsrc/regex.dm
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#define RUSTG_REGEX_FLAG_GLOBAL (1 << 0)

/// A drop in replacement for /regex using rust-g.
/// You should be able to replace anywhere you use regex() with this.
/// MBTODO: ...but not now.
/datum/rustg_regex
var/index
var/next
var/list/group

var/flags = 0

var/pattern

// MBTODO: Flags
/datum/rustg_regex/New(pattern, flags)
// MBTODO: Validate
src.pattern = pattern

if (!istext(flags) && !isnull(flags))
CRASH("Expected string for flags, received [flags]")

for (var/character_index in 1 to length(flags))
var/character = copytext(flags, character_index, character_index + 1)
switch (character)
if ("g")
src.flags |= RUSTG_REGEX_FLAG_GLOBAL
else
CRASH("unknown flag passed to regex: [character]")

// MBTODO: End
/datum/rustg_regex/proc/Find(haystack, start, end = 0)
if (isnull(start))
if ((flags & RUSTG_REGEX_FLAG_GLOBAL) && !isnull(next))
start = next
else
start = 1

var/list/result = json_decode(RUSTG_CALL(RUST_G, "regex_captures")(pattern, haystack, "[start - 1]"))
if (!result["success"])
CRASH(result["reason"])
return

var/list/regex_result = result["result"]

next = regex_result["next"] + 1
index = regex_result["index"] + 1
group = regex_result["captures"]

return index

#undef RUSTG_REGEX_FLAG_GLOBAL
2 changes: 2 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ pub mod noise_gen;
pub mod pathfinder;
#[cfg(feature = "redis_pubsub")]
pub mod redis_pubsub;
#[cfg(feature = "regex")]
pub mod regex;
#[cfg(feature = "sql")]
pub mod sql;
#[cfg(feature = "time")]
Expand Down
72 changes: 72 additions & 0 deletions src/regex.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
use regex::Regex;
use serde::Serialize;

byond_fn!(
fn regex_captures(pattern, text, start) {
let start: usize = match start.parse() {
Ok(start) => start,
Err(_) => {
return serde_json::to_string(&serde_json::json!({
"success": false,
"reason": "invalid start index",
})).ok();
}
};

serde_json::to_string(&match regex_captures_impl(pattern, text, start) {
Ok(captures) => serde_json::json!({
"success": true,
"result": captures,
}),

Err(error) => serde_json::json!({
"success": false,
"reason": error.to_string(),
}),
}).ok()
}
);

#[derive(Serialize)]
struct CaptureResult {
captures: Vec<Option<String>>,

index: usize,
next: usize,

#[serde(alias = "match")]
the_match: String,
}

fn regex_captures_impl(
pattern: &str,
text: &str,
start: usize,
) -> Result<Option<CaptureResult>, regex::Error> {
let regex = Regex::new(pattern)?;

let mut locations = regex.capture_locations();
let the_match = match regex.captures_read_at(&mut locations, text, start) {
Some(captures) => captures,
None => return Ok(None),
};

let mut captures = Vec::with_capacity(locations.len().saturating_sub(1));

for i in 1..locations.len() {
captures.push(
locations
.get(i)
.map(|(start, end)| text[start..end].to_owned()),
);
}

Ok(Some(CaptureResult {
captures,

index: the_match.start(),
next: the_match.end(),

the_match: the_match.as_str().to_owned(),
}))
}
6 changes: 6 additions & 0 deletions tests/dm-tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@ fn git() {
run_dm_tests("git");
}

#[cfg(feature = "regex")]
#[test]
fn regex() {
run_dm_tests("regex");
}

#[cfg(feature = "toml")]
#[test]
fn toml() {
Expand Down
13 changes: 13 additions & 0 deletions tests/dm/common.dm
Original file line number Diff line number Diff line change
@@ -1,8 +1,21 @@
#define RUST_G world.GetConfig("env", "RUST_G")
#include "../../target/rust_g.dm"

/// Asserts that the two parameters passed are equal, fails otherwise
/// Optionally allows an additional message in the case of a failure
#define TEST_ASSERT_EQUAL(a, b) do { \
var/lhs = ##a; \
var/rhs = ##b; \
if (lhs != rhs) { \
stack_trace("[__FILE__]:[__LINE__]: Expected [isnull(lhs) ? "null" : lhs] to be equal to [isnull(rhs) ? "null" : rhs]."); \
} \
} while (FALSE)

/world/New()
for(var/func in typesof(/test/proc))
log << "[func] [copytext("------------------------------------------------------------------------", length("[func]"))]"
call(new /test, func)()
del src

/proc/stack_trace(message)
CRASH(message)
25 changes: 25 additions & 0 deletions tests/dm/regex.dme
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#include "common.dm"

/test/proc/regex_global()
var/datum/rustg_regex/re_global = new("a(.)c", "g")

TEST_ASSERT_EQUAL(re_global.Find("abc adc"), 1)
TEST_ASSERT_EQUAL(re_global.next, 4)

TEST_ASSERT_EQUAL(re_global.group.len, 1)
TEST_ASSERT_EQUAL(re_global.group[1], "b")

TEST_ASSERT_EQUAL(re_global.Find("abc adc"), 5)
TEST_ASSERT_EQUAL(re_global.next, 8)

TEST_ASSERT_EQUAL(re_global.group.len, 1)
TEST_ASSERT_EQUAL(re_global.group[1], "d")

/test/proc/regex_multiple_captures()
var/datum/rustg_regex/re_multiple_captures = new("(a.c)|(d.f)")

TEST_ASSERT_EQUAL(re_multiple_captures.Find("def"), 1)

TEST_ASSERT_EQUAL(re_multiple_captures.group.len, 2)
TEST_ASSERT_EQUAL(re_multiple_captures.group[1], null)
TEST_ASSERT_EQUAL(re_multiple_captures.group[2], "def")