Skip to content

relationalai.std.re.Match

class Match(Producer)

Represents a match returned by a regular expression operation, such as match(), search() or fullmatch(). Provides attributes and methods for getting details about the match. Must be used in a rule or query context.

Use the match(), search(), or fullmatch() functions to get a Match object:

import relationalai as rai
from relationalai.std import re
# =====
# SETUP
# =====
model = rai.Model("MyModel")
Person = model.Type("Person")
with model.rule():
Person.add(id=1).set(full_name="Alan Turing")
Person.add(id=2).set(full_name="Barbara Liskov")
# =======
# EXAMPLE
# =======
# Set first_name and last_name properties for each person.
with model.rule():
person = Person()
match = re.match(r"(\w+) (\w+)", person.full_name)
person.set(first_name=match.group(1), last_name=match.group(2))
with model.query() as select:
person = Person()
response = select(person.id, person.first_name, person.last_name)
print(response.results)
# id first_name last_name
# 0 1 Alan Turing
# 1 2 Barbara Liskov

Match objects can be assigned to object properties. The property is assigned the full match string, and not the Match object itself:

with model.rule():
person = Person()
# Match full names starting with "B".
match = re.match(r"B.*", person.full_name)
# Set the matched_string property. This does not assign the Match object
# itself to the property, but rather the full match string.
person.set(matched_string=match)
with model.query() as select:
person = Person()
response = select(person.id, person.matched_string)
print(response.results)
# id matched_string
# 0 1 NaN
# 1 2 Barbara Liskov
# Since the Match object is not assigned to the property, you can't use its
# attributes or methods when accessing the matched_string property.
with model.query() as select:
person = Person()
match_start = person.matched_string.start() # Raises error
response = select(person.id, match_start)
NameDescriptionType
.posThe value of pos passed to match(), search(), or fullmatch(). This represents the starting position of the match or search.Producer or Python int
.reThe compiled regular expression object whose .match(), .search(), or .fullmatch() method returned this Match object.Pattern
.stringThe string passed to match(), search(), or fullmatch().Producer or Python str
NameDescriptionReturn Type
.group()Returns a subgroup of the match.Expression
.__getitem__()Identical to .group(). Allows subscript access to individual groups from a match.Expression
.start()Returns the starting position of the match.Expression
.end()Returns the ending position of the match.Expression
.span()Returns a tuple containing the starting and ending positions of the match.tuple[Expression]
Match.__getitem__(index: int|str|Producer) -> Expression

Returns a subgroup captured by a regular expression match, using subscript notation. For an integer index, .__getitem__() returns the subgroup at that index, where 0 is the entire match. For a string index, it returns the subgroup with that name. If index is a Producer, .__getitem__() filters out non-integer and non-string values from the producer. Must be used in a rule or query context.

NameTypeDescription
indexProducer or Python int or strThe index or name of the subgroup to return.

An Expression object.

The .__getitem__() method allows access to subgroups in a match by index or name using subscript notation:

import relationalai as rai
from relationalai.std import re
# =====
# SETUP
# =====
model = rai.Model("MyModel")
Person = model.Type("Person")
with model.rule():
Person.add(id=1).set(name="Alan Turing")
Person.add(id=2).set(name="Bob")
Person.add(id=3).set(name=-1) # Non-string name
# =======
# EXAMPLE
# =======
with model.rule():
person = Person()
# Extract the first and last names of each person.
match = re.match(r"(\w+) (\w+)", person.name)
# Since .__getitem__() filters out non-strings and non-matching strings, the
# following does not set properties for Person objects with ID 2 and 3.
person.set(
full_name=match[0], # Get the entire match
first_name=match[1], # Get the first subgroup
last_name=match[2], # Get the second subgroup
)
with model.query() as select:
person = Person()
response = select(
person.id,
person.name,
person.full_name,
person.first_name,
person.last_name
)
print(response.results)
# id name full_name first_name last_name
# 0 1 Alan Turing Alan Turing Alan Turing
# 1 2 Bob NaN NaN NaN
# 2 3 -1 NaN NaN NaN

Named groups can also be accessed using subscript notation:

from relationalai.std import alias
with model.query() as select:
person = Person()
# Extract the first and last names of each person using named groups.
match = re.match(r"(?P<first>\w+) (?P<last>\w+)", person.name)
response = select(
alias(match[0], "full_name"),
alias(match["first"], "first_name"), # Get the subgroup named "first"
alias(match["last"], "last_name"), # Get the subgroup named "last"
)
print(response.results)
# full_name first_name last_name
# 0 Alan Turing Alan Turing

Alternatively, you can access subgroups using the .group() method:

match.group(1) # Equivalent to match[1]
match.group("first") # Equivalent to match["first"]
Match.end() -> Expression

Returns the 0-based ending position of the match. Must be used in a rule or query context.

None.

An Expression object.

import relationalai as rai
from relationalai.std import alias, re
# =====
# SETUP
# =====
model = rai.Model("MyModel")
Company = model.Type("Company")
with model.rule():
Company.add(name="RelationalAI")
Company.add(name="Snowflake")
# =======
# EXAMPLE
# =======
with model.query() as select:
company = Company()
# Filter companies whose name contains 'AI'.
match = re.search(r"AI", company.name)
response = select(
company.name,
alias(match.end(), "match_end"), # Get the ending position of the match
)
print(response.results)
# name match_end
# 0 RelationalAI 12
Match.group(index: int|str|Producer = 0) -> Expression

Returns a subgroup captured by a regular expression match. For an integer index, .group() returns the subgroup at that index, where 0 is the entire match. For a string index, it returns the subgroup with that name. If index is a Producer, then .group() filters out non-integer and non-string values from the producer. Must be used in a rule or query context.

NameTypeDescription
indexProducer or Python int or strThe index or name of the subgroup to return. Default is 0, which returns the entire match.

An Expression object.

Use .group() to extract subgroups from a match by index:

import relationalai as rai
from relationalai.std import re
# =====
# SETUP
# =====
model = rai.Model("MyModel")
Person = model.Type("Person")
with model.rule():
Person.add(id=1).set(name="Alan Turing")
Person.add(id=2).set(name="Bob")
Person.add(id=3).set(name=-1) # Non-string name
# =======
# EXAMPLE
# =======
with model.rule:
person = Person()
# Extract the first and last names of each person.
match = re.match(r"(\w+) (\w+)", person.name)
# Since .group() filters out non-strings and non-matching strings, the
# following does not set properties for the Person objects with ID 2 and 3.
person.set(
full_name=match.group(0), # Get the entire match
first_name=match.group(1), # Get the first subgroup
last_name=match.group(2), # Get the second subgroup
)
with model.query() as select:
person = Person()
response = select(person.id, person.name, person.full_name, person.first_name, person.last_name)
print(response.results)
# id name full_name first_name last_name
# 0 1 Alan Turing Alan Turing Alan Turing
# 1 2 Bob NaN NaN NaN
# 2 3 -1 NaN NaN NaN

Named groups can be accessed by name:

from relationalai.std import alias
with model.query() as select:
person = Person()
# Extract the first and last names of each person using named groups.
match = re.match(r"(?P<first>\w+) (?P<last>\w+)", person.name)
response = select(
alias(match.group(), "full_name"),
alias(match.group("first"), "first_name"), # Get the subgroup named "first"
alias(match.group("last"), "last_name"), # Get the subgroup named "last"
)
print(response.results)
# full_name first_name last_name
# 0 Alan Turing Alan Turing

Alternatively, you can access subgroups using subscript notation:

match[1] # Equivalent to match.group(1)
match["first"] # Equivalent to match.group("first")
Match.span() -> Expression

Returns a tuple of two Expression objects that produce the 0-based starting and ending position of the match. Equivalent to (match.start(), match.end()). Must be used in a rule or query context.

None.

An Expression object.

import relationalai as rai
from relationalai.std import alias, re
# =====
# SETUP
# =====
model = rai.Model("companies")
Company = model.Type("Company")
with model.rule():
Company.add(name="RelationalAI")
Company.add(name="Snowflake")
# =======
# EXAMPLE
# =======
with model.query() as select:
company = Company()
# Filter companies whose name contains 'AI'.
match = re.search(r"AI", company.name)
# Get the start and end positions of the match.
start, end = match.span()
response = select(
company.name,
alias(start, "match_start"),
alias(match.end(), "match_end"),
)
print(response.results)
# name match_start match_end
# 0 RelationalAI 10 12

Note that you can’t select the span() method directly. Instead, you must unpack the tuple returned by span() into separate variables:

# =========
# INCORRECT
# =========
with model.query() as select:
company = Company()
match = re.search(r"AI", company.name)
response = select(
company.name,
match.span(), # Does not work
)
# =======
# CORRECT
# =======
with model.query() as select:
company = Company()
match = re.search(r"AI", company.name)
start, end = match.span()
response = select(company.name, start, end)
Match.start() -> Expression

Returns the 0-based starting position of the match. Must be used in a rule or query context.

None.

An Expression object.

import relationalai as rai
from relationalai.std import alias, re
# =====
# SETUP
# =====
model = rai.Model("MyModel")
Company = model.Type("Company")
with model.rule():
Company.add(name="RelationalAI")
Company.add(name="Snowflake")
# =======
# EXAMPLE
# =======
with model.query() as select:
company = Company()
# Filter companies whose name contains 'AI'.
match = re.search(r"AI", company.name)
response = select(
company.name,
alias(match.start(), "match_start"), # Get the start position of the match
)
print(response.results)
# name match_start
# 0 RelationalAI 10