Skip to content

Commit a4f4fd6

Browse files
authored
Merge pull request aichaos#95 from Dinh-Hung-Tu/aichaos#94
aichaos#94 Improve ranking algorithm
2 parents 2046099 + 59416cd commit a4f4fd6

4 files changed

Lines changed: 47 additions & 30 deletions

File tree

rivescript/regexp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class RE(object):
1717
objend = re.compile('^\s*<\s*object')
1818
weight = re.compile(r'\s*\{weight=(\d+)\}\s*')
1919
inherit = re.compile('\{inherits=(\d+)\}')
20-
wilds = re.compile('[\s\*\#\_]+')
20+
wilds_and_optionals = re.compile('[\s\*\#\_\[\]()]+')
2121
nasties = re.compile('[^A-Za-z0-9 ]')
2222
crlf = re.compile('<crlf>')
2323
literal_w = re.compile(r'\\w')

rivescript/sorting.py

Lines changed: 5 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class TriggerObj(object):
3232
pound: Number of numeric wildcards (``#``)
3333
under: Number of alphabetical wildcards (``_``)
3434
option: Number of optional tags ("[man]" in "hey [man]"), assume that the template is properly formatted
35+
is_empty: Boolean variable indicating whether the trigger has non-zero wordcount
3536
"""
3637

3738
def __init__(self, pattern, index, weight, inherit = sys.maxsize):
@@ -45,24 +46,8 @@ def __init__(self, pattern, index, weight, inherit = sys.maxsize):
4546
self.pound = self.alphabet.count('#') # Number of numeric wildcards 0 < 1
4647
self.under = self.alphabet.count('_') # Number of alphabetical wildcards 0 < 1
4748
self.option = self.alphabet.count('[') + self.alphabet.count('(') # Number of option 0 < 1
49+
self.is_empty = self.wordcount == 0 # Triggers with words precede triggers with no words, False < True
4850

49-
if self.star > 0:
50-
if (self.pound == 0) & (self.under == 0) & (self.option == 0): # Place single star last in the rank
51-
self.pound = sys.maxsize
52-
self.under = sys.maxsize
53-
self.option = sys.maxsize
54-
if self.wordcount == 0: # The special case for single star "*", or a grey case "* *"
55-
self.wordcount = sys.maxsize # Make sure template "hello *" > "*"
56-
# Without any words number of stars does not matter, they all mean match any.
57-
self.star = sys.maxsize # Make sure "*" is last in the list, "* love *" > "*"
58-
59-
# Special handle for the case "[*]", since self.len is not re-set, self.len = -2 < 0. Thus, "[*]" > "*"
60-
elif (self.option == 1) & (self.wordcount == -2):
61-
self.wordcount = sys.maxsize
62-
self.star = sys.maxsize
63-
self.pound = sys.maxsize
64-
self.under = sys.maxsize
65-
self.option = sys.maxsize
6651

6752
def sort_trigger_set(triggers, exclude_previous=True, say=None):
6853
"""Sort a group of triggers in optimal sorting order.
@@ -125,9 +110,10 @@ def sort_trigger_set(triggers, exclude_previous=True, say=None):
125110

126111
trigger_object_list.append(TriggerObj(pattern, index, weight, inherit))
127112

128-
# Priority order of sorting criteria: weight, inherit, star, pound, under, option, wordcount, len, alphabet
113+
# Priority order of sorting criteria:
114+
# weight, inherit, is_empty, star, pound, under, option, wordcount, len, alphabet
129115
sorted_list = sorted(trigger_object_list,
130-
key=attrgetter('weight', 'inherit', 'star', 'pound',
116+
key=attrgetter('weight', 'inherit', 'is_empty', 'star', 'pound',
131117
'under', 'option', 'wordcount', 'len', 'alphabet'))
132118
return [triggers[item.index] for item in sorted_list]
133119

rivescript/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
import string
1313

1414
def word_count(trigger, all=False):
15-
"""Count the words that aren't wildcards in a trigger.
15+
"""Count the words that aren't wildcards or options in a trigger.
1616
1717
:param str trigger: The trigger to count words for.
1818
:param bool all: Count purely based on whitespace separators, or
@@ -23,7 +23,7 @@ def word_count(trigger, all=False):
2323
if all:
2424
words = re.split(RE.ws, trigger)
2525
else:
26-
words = re.split(RE.wilds, trigger)
26+
words = re.split(RE.wilds_and_optionals, trigger)
2727

2828
wc = 0 # Word count
2929
for word in words:

tests/test_sorting.py

Lines changed: 39 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,18 @@ def test_sorting_triggers(self):
5555
5656
+ [*]
5757
- 15
58+
59+
+ hi _
60+
- 16
61+
62+
+ _ _
63+
- 17
64+
65+
+ ho _{weight=100}
66+
- 18
67+
68+
+ ho _
69+
- 19
5870
""")
5971

6072
sorted_triggers = {trig[0]:position for position, trig in enumerate(self.rs._brain.master._sorted["topics"]['random'])}
@@ -75,14 +87,33 @@ def test_sorting_triggers(self):
7587
# 5) Sorted by number of wildcard triggers
7688
self.assertLess(sorted_triggers['hi *'], sorted_triggers['* you *'])
7789

78-
# 6) The `super catch all` (only single star `*`) should be least priority
79-
self.assertEqual(sorted_triggers['*'], max(sorted_triggers.values()))
80-
self.assertLess(sorted_triggers['hi [*]'], sorted_triggers['*']) # another check but will be covered by max check above
81-
82-
# 7) The cousin of `super catch all` (only single star with option `[*]`) should be second-last, following the
83-
# `soft` sorting convention that trigger with more non-star characters goes first (more specific matches first)
84-
self.assertLess(sorted_triggers['[*]'], sorted_triggers['*'])
85-
self.assertEqual(sorted_triggers['[*]'], max(sorted_triggers.values())-1)
90+
# 6) The `super catch all` (only single star `*` or `[*]`) should be the last two
91+
third_last_position = max(sorted_triggers.values())-2
92+
self.assertLess(third_last_position, sorted_triggers['*'])
93+
self.assertLess(sorted_triggers['hi [*]'], sorted_triggers['*'])
94+
self.assertLess(third_last_position, sorted_triggers['[*]'])
8695
self.assertLess(sorted_triggers['[*] hi [*]'], sorted_triggers['[*]'])
8796
self.assertLess(sorted_triggers['[*] hi *'], sorted_triggers['*'])
8897
self.assertLess(sorted_triggers['hi [*]'], sorted_triggers['[*]'])
98+
99+
# 7) Trigger with no text should rank lower than trigger with some text, even with wildcards.
100+
self.assertLess(sorted_triggers['hel lo'], sorted_triggers['_ _'])
101+
self.assertLess(sorted_triggers['hi [*]'], sorted_triggers['_ _'])
102+
self.assertLess(sorted_triggers['hi *'], sorted_triggers['_ _'])
103+
self.assertLess(sorted_triggers['hi _'], sorted_triggers['_ _'])
104+
105+
# 8) Among the triggers with no text, the order of wildcard priority still holds
106+
self.assertLess(sorted_triggers['_ _'], sorted_triggers['[*]'])
107+
self.assertLess(sorted_triggers['_ _'], sorted_triggers['*'])
108+
109+
# 9) Among the triggers with text, the order of wildcard priority still holds
110+
self.assertLess(sorted_triggers['hi _'], sorted_triggers['hi *'])
111+
self.assertLess(sorted_triggers['hi _'], sorted_triggers['hi [*]'])
112+
113+
# 10) Among the triggers with text, the order of wildcard priority still holds
114+
self.assertLess(sorted_triggers['hi _'], sorted_triggers['hi *'])
115+
self.assertLess(sorted_triggers['hi _'], sorted_triggers['hi [*]'])
116+
117+
# 11) Making sure that the weight tag is taken into account
118+
self.assertLess(sorted_triggers['ho _{weight=100}'], sorted_triggers['hi _'])
119+
self.assertLess(sorted_triggers['hi _'], sorted_triggers['ho _'])

0 commit comments

Comments
 (0)