Skip to content

Commit 0647459

Browse files
committed
revamp the sorting algorithm using Python multilevel sort
1 parent b47f37c commit 0647459

1 file changed

Lines changed: 61 additions & 112 deletions

File tree

rivescript/sorting.py

Lines changed: 61 additions & 112 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,43 @@
99
from .regexp import RE
1010
from . import utils
1111
import re
12+
from operator import itemgetter, attrgetter
13+
import sys
14+
15+
16+
class TriggerObj(object):
17+
"""An object represent trigger for ease of sorting.
18+
19+
In RiveScript sorting rule, some of sorting criteria are ascending for example alphabetical or inherit whereas other
20+
criteria are descending order for example word counts. In Python multiple level sort, the sort direction set by
21+
parameter `reverse` is applied to all criteria. So in our implementation, some parameters are set to negative to
22+
keep search direction consistent among all criteria.
23+
24+
Parameters:
25+
pattern: Trigger pattern in string format i.e. "* hey [man]"
26+
index: Unique positional index of the object in the original list
27+
weight: Pattern weight ``{weight}``
28+
inherit: Pattern inherit level, extracted from i.e. "{inherit=1}hi"
29+
wordcount: Length of pattern by wordcount
30+
len: Length of pattern by character count
31+
star: Number of wildcards (``*``), excluding alphabetical wildcards, and numeric wildcards
32+
pound: Number of numeric wildcards (``#``)
33+
under: Number of alphabetical wildcards (``_``)
34+
option: Number of optional tags ("[man]" in "hey [man]")
35+
"""
36+
37+
def __init__(self, pattern, index, weight, inherit = sys.maxint):
38+
self.alphabet = pattern # Sort according to alphabet order i.e. haha < hihi
39+
self.index = index # For rearrange items in the sorted array
40+
self.weight = - weight # Negative weight to place i.e. -100 < 0
41+
self.inherit = inherit # Low inherit takes precedence i.e. 0 < 1
42+
self.wordcount = - utils.word_count(pattern) # Length -2 < -1
43+
self.len = -len(self.alphabet) # Length -10 < -5
44+
self.star = self.alphabet.count('*') # Number of wildcards 0 < 1
45+
self.pound = self.alphabet.count('#') # Number of numeric wildcards 0 < 1
46+
self.under = self.alphabet.count('_') # Number of alphabetical wildcards 0 < 1
47+
self.option = self.alphabet.count('[') # Assume that the template is properly formatted 0 < 1
48+
1249

1350
def sort_trigger_set(triggers, exclude_previous=True, say=None):
1451
"""Sort a group of triggers in optimal sorting order.
@@ -46,124 +83,36 @@ def sort_trigger_set(triggers, exclude_previous=True, say=None):
4683
# ["trigger text", pointer to trigger data]
4784
# So this code will use e.g. `trig[0]` when referring to the trigger text.
4885

49-
# Create a priority map.
50-
prior = {
51-
0: [] # Default priority=0
52-
}
86+
# Create a list of trigger objects map.
87+
trigger_object_list = []
88+
for index, trig in enumerate(triggers):
5389

54-
for trig in triggers:
5590
if exclude_previous and trig[1]["previous"]:
5691
continue
5792

93+
pattern = trig[0] # Extract only the text of the trigger, with possible tag of inherit
94+
95+
# See if it has a weight tag
5896
match, weight = re.search(RE.weight, trig[0]), 0
97+
if match: # Value of math is not None if there is a match.
98+
weight = int(match.group(1)) # Get the weight from the tag ``{weight}``
99+
100+
# See if it has an inherits tag.
101+
match = re.search(RE.inherit, pattern)
59102
if match:
60-
weight = int(match.group(1))
61-
if weight not in prior:
62-
prior[weight] = []
63-
64-
prior[weight].append(trig)
65-
66-
# Keep a running list of sorted triggers for this topic.
67-
running = []
68-
69-
# Sort them by priority.
70-
for p in sorted(prior.keys(), reverse=True):
71-
say("\tSorting triggers with priority " + str(p))
72-
73-
# So, some of these triggers may include {inherits} tags, if they
74-
# came form a topic which inherits another topic. Lower inherits
75-
# values mean higher priority on the stack.
76-
inherits = -1 # -1 means no {inherits} tag
77-
highest_inherits = -1 # highest inheritance number seen
78-
79-
# Loop through and categorize these triggers.
80-
track = {
81-
inherits: init_sort_track()
82-
}
83-
84-
for trig in prior[p]:
85-
pattern = trig[0]
86-
say("\t\tLooking at trigger: " + pattern)
87-
88-
# See if it has an inherits tag.
89-
match = re.search(RE.inherit, pattern)
90-
if match:
91-
inherits = int(match.group(1))
92-
if inherits > highest_inherits:
93-
highest_inherits = inherits
94-
say("\t\t\tTrigger belongs to a topic which inherits other topics: level=" + str(inherits))
95-
pattern = re.sub(RE.inherit, "", pattern)
96-
trig[0] = pattern
97-
else:
98-
inherits = -1
99-
100-
# If this is the first time we've seen this inheritance level,
101-
# initialize its track structure.
102-
if inherits not in track:
103-
track[inherits] = init_sort_track()
104-
105-
# Start inspecting the trigger's contents.
106-
if '_' in pattern:
107-
# Alphabetic wildcard included.
108-
cnt = utils.word_count(pattern)
109-
say("\t\t\tHas a _ wildcard with " + str(cnt) + " words.")
110-
if cnt > 1:
111-
if cnt not in track[inherits]['alpha']:
112-
track[inherits]['alpha'][cnt] = []
113-
track[inherits]['alpha'][cnt].append(trig)
114-
else:
115-
track[inherits]['under'].append(trig)
116-
elif '#' in pattern:
117-
# Numeric wildcard included.
118-
cnt = utils.word_count(pattern)
119-
say("\t\t\tHas a # wildcard with " + str(cnt) + " words.")
120-
if cnt > 1:
121-
if cnt not in track[inherits]['number']:
122-
track[inherits]['number'][cnt] = []
123-
track[inherits]['number'][cnt].append(trig)
124-
else:
125-
track[inherits]['pound'].append(trig)
126-
elif '*' in pattern:
127-
# Wildcard included.
128-
cnt = utils.word_count(pattern)
129-
say("\t\t\tHas a * wildcard with " + str(cnt) + " words.")
130-
if cnt > 1:
131-
if cnt not in track[inherits]['wild']:
132-
track[inherits]['wild'][cnt] = []
133-
track[inherits]['wild'][cnt].append(trig)
134-
else:
135-
track[inherits]['star'].append(trig)
136-
elif '[' in pattern:
137-
# Optionals included.
138-
cnt = utils.word_count(pattern)
139-
say("\t\t\tHas optionals and " + str(cnt) + " words.")
140-
if cnt not in track[inherits]['option']:
141-
track[inherits]['option'][cnt] = []
142-
track[inherits]['option'][cnt].append(trig)
143-
else:
144-
# Totally atomic.
145-
cnt = utils.word_count(pattern)
146-
say("\t\t\tTotally atomic and " + str(cnt) + " words.")
147-
if cnt not in track[inherits]['atomic']:
148-
track[inherits]['atomic'][cnt] = []
149-
track[inherits]['atomic'][cnt].append(trig)
150-
151-
# Move the no-{inherits} triggers to the bottom of the stack.
152-
track[highest_inherits + 1] = track[-1]
153-
del(track[-1])
154-
155-
# Add this group to the sort list.
156-
for ip in sorted(track.keys()):
157-
say("ip=" + str(ip))
158-
for kind in ['atomic', 'option', 'alpha', 'number', 'wild']:
159-
for wordcnt in sorted(track[ip][kind], reverse=True):
160-
# Triggers with a matching word count should be sorted
161-
# by length, descending.
162-
running.extend(sorted(track[ip][kind][wordcnt], key=len, reverse=True))
163-
running.extend(sorted(track[ip]['under'], key=len, reverse=True))
164-
running.extend(sorted(track[ip]['pound'], key=len, reverse=True))
165-
running.extend(sorted(track[ip]['star'], key=len, reverse=True))
166-
return running
103+
inherit = int(match.group(1)) # Get inherit value from the tag ``{inherit}``
104+
say("\t\t\tTrigger belongs to a topic which inherits other topics: level=" + str(inherit))
105+
triggers[index][0] = pattern = re.sub(RE.inherit, "", pattern) # Remove the inherit tag if any
106+
else:
107+
inherit = sys.maxint # If not found any inherit, set it to the maximum value, to place it last in the sort
108+
109+
trigger_object_list.append(TriggerObj(pattern, index, weight, inherit))
110+
111+
# Priority order of sorting criteria: weight, inherit, star, pound, under, option, wordcount, len, alphabet
112+
sorted_list = sorted(trigger_object_list,
113+
key=attrgetter('weight', 'inherit', 'star', 'pound',
114+
'under', 'option', 'wordcount', 'len', 'alphabet'))
115+
return [triggers[item.index] for item in sorted_list]
167116

168117
def sort_list(items):
169118
"""Sort a simple list by number of words and length."""

0 commit comments

Comments
 (0)