|
9 | 9 | from .regexp import RE |
10 | 10 | from . import utils |
11 | 11 | import re |
| 12 | +from operator import itemgetter, attrgetter |
| 13 | +import sys |
| 14 | + |
| 15 | + |
| 16 | +class TriggerObj(object): |
| 17 | + """An object represent trigger for ease of sorting. |
| 18 | +
|
| 19 | + In RiveScript sorting rule, some of sorting criteria are ascending for example alphabetical or inherit whereas other |
| 20 | + criteria are descending order for example word counts. In Python multiple level sort, the sort direction set by |
| 21 | + parameter `reverse` is applied to all criteria. So in our implementation, some parameters are set to negative to |
| 22 | + keep search direction consistent among all criteria. |
| 23 | +
|
| 24 | + Parameters: |
| 25 | + pattern: Trigger pattern in string format i.e. "* hey [man]" |
| 26 | + index: Unique positional index of the object in the original list |
| 27 | + weight: Pattern weight ``{weight}`` |
| 28 | + inherit: Pattern inherit level, extracted from i.e. "{inherit=1}hi" |
| 29 | + wordcount: Length of pattern by wordcount |
| 30 | + len: Length of pattern by character count |
| 31 | + star: Number of wildcards (``*``), excluding alphabetical wildcards, and numeric wildcards |
| 32 | + pound: Number of numeric wildcards (``#``) |
| 33 | + under: Number of alphabetical wildcards (``_``) |
| 34 | + option: Number of optional tags ("[man]" in "hey [man]") |
| 35 | + """ |
| 36 | + |
| 37 | + def __init__(self, pattern, index, weight, inherit = sys.maxint): |
| 38 | + self.alphabet = pattern # Sort according to alphabet order i.e. haha < hihi |
| 39 | + self.index = index # For rearrange items in the sorted array |
| 40 | + self.weight = - weight # Negative weight to place i.e. -100 < 0 |
| 41 | + self.inherit = inherit # Low inherit takes precedence i.e. 0 < 1 |
| 42 | + self.wordcount = - utils.word_count(pattern) # Length -2 < -1 |
| 43 | + self.len = -len(self.alphabet) # Length -10 < -5 |
| 44 | + self.star = self.alphabet.count('*') # Number of wildcards 0 < 1 |
| 45 | + self.pound = self.alphabet.count('#') # Number of numeric wildcards 0 < 1 |
| 46 | + self.under = self.alphabet.count('_') # Number of alphabetical wildcards 0 < 1 |
| 47 | + self.option = self.alphabet.count('[') # Assume that the template is properly formatted 0 < 1 |
| 48 | + |
12 | 49 |
|
13 | 50 | def sort_trigger_set(triggers, exclude_previous=True, say=None): |
14 | 51 | """Sort a group of triggers in optimal sorting order. |
@@ -46,124 +83,36 @@ def sort_trigger_set(triggers, exclude_previous=True, say=None): |
46 | 83 | # ["trigger text", pointer to trigger data] |
47 | 84 | # So this code will use e.g. `trig[0]` when referring to the trigger text. |
48 | 85 |
|
49 | | - # Create a priority map. |
50 | | - prior = { |
51 | | - 0: [] # Default priority=0 |
52 | | - } |
| 86 | + # Create a list of trigger objects map. |
| 87 | + trigger_object_list = [] |
| 88 | + for index, trig in enumerate(triggers): |
53 | 89 |
|
54 | | - for trig in triggers: |
55 | 90 | if exclude_previous and trig[1]["previous"]: |
56 | 91 | continue |
57 | 92 |
|
| 93 | + pattern = trig[0] # Extract only the text of the trigger, with possible tag of inherit |
| 94 | + |
| 95 | + # See if it has a weight tag |
58 | 96 | match, weight = re.search(RE.weight, trig[0]), 0 |
| 97 | + if match: # Value of math is not None if there is a match. |
| 98 | + weight = int(match.group(1)) # Get the weight from the tag ``{weight}`` |
| 99 | + |
| 100 | + # See if it has an inherits tag. |
| 101 | + match = re.search(RE.inherit, pattern) |
59 | 102 | if match: |
60 | | - weight = int(match.group(1)) |
61 | | - if weight not in prior: |
62 | | - prior[weight] = [] |
63 | | - |
64 | | - prior[weight].append(trig) |
65 | | - |
66 | | - # Keep a running list of sorted triggers for this topic. |
67 | | - running = [] |
68 | | - |
69 | | - # Sort them by priority. |
70 | | - for p in sorted(prior.keys(), reverse=True): |
71 | | - say("\tSorting triggers with priority " + str(p)) |
72 | | - |
73 | | - # So, some of these triggers may include {inherits} tags, if they |
74 | | - # came form a topic which inherits another topic. Lower inherits |
75 | | - # values mean higher priority on the stack. |
76 | | - inherits = -1 # -1 means no {inherits} tag |
77 | | - highest_inherits = -1 # highest inheritance number seen |
78 | | - |
79 | | - # Loop through and categorize these triggers. |
80 | | - track = { |
81 | | - inherits: init_sort_track() |
82 | | - } |
83 | | - |
84 | | - for trig in prior[p]: |
85 | | - pattern = trig[0] |
86 | | - say("\t\tLooking at trigger: " + pattern) |
87 | | - |
88 | | - # See if it has an inherits tag. |
89 | | - match = re.search(RE.inherit, pattern) |
90 | | - if match: |
91 | | - inherits = int(match.group(1)) |
92 | | - if inherits > highest_inherits: |
93 | | - highest_inherits = inherits |
94 | | - say("\t\t\tTrigger belongs to a topic which inherits other topics: level=" + str(inherits)) |
95 | | - pattern = re.sub(RE.inherit, "", pattern) |
96 | | - trig[0] = pattern |
97 | | - else: |
98 | | - inherits = -1 |
99 | | - |
100 | | - # If this is the first time we've seen this inheritance level, |
101 | | - # initialize its track structure. |
102 | | - if inherits not in track: |
103 | | - track[inherits] = init_sort_track() |
104 | | - |
105 | | - # Start inspecting the trigger's contents. |
106 | | - if '_' in pattern: |
107 | | - # Alphabetic wildcard included. |
108 | | - cnt = utils.word_count(pattern) |
109 | | - say("\t\t\tHas a _ wildcard with " + str(cnt) + " words.") |
110 | | - if cnt > 1: |
111 | | - if cnt not in track[inherits]['alpha']: |
112 | | - track[inherits]['alpha'][cnt] = [] |
113 | | - track[inherits]['alpha'][cnt].append(trig) |
114 | | - else: |
115 | | - track[inherits]['under'].append(trig) |
116 | | - elif '#' in pattern: |
117 | | - # Numeric wildcard included. |
118 | | - cnt = utils.word_count(pattern) |
119 | | - say("\t\t\tHas a # wildcard with " + str(cnt) + " words.") |
120 | | - if cnt > 1: |
121 | | - if cnt not in track[inherits]['number']: |
122 | | - track[inherits]['number'][cnt] = [] |
123 | | - track[inherits]['number'][cnt].append(trig) |
124 | | - else: |
125 | | - track[inherits]['pound'].append(trig) |
126 | | - elif '*' in pattern: |
127 | | - # Wildcard included. |
128 | | - cnt = utils.word_count(pattern) |
129 | | - say("\t\t\tHas a * wildcard with " + str(cnt) + " words.") |
130 | | - if cnt > 1: |
131 | | - if cnt not in track[inherits]['wild']: |
132 | | - track[inherits]['wild'][cnt] = [] |
133 | | - track[inherits]['wild'][cnt].append(trig) |
134 | | - else: |
135 | | - track[inherits]['star'].append(trig) |
136 | | - elif '[' in pattern: |
137 | | - # Optionals included. |
138 | | - cnt = utils.word_count(pattern) |
139 | | - say("\t\t\tHas optionals and " + str(cnt) + " words.") |
140 | | - if cnt not in track[inherits]['option']: |
141 | | - track[inherits]['option'][cnt] = [] |
142 | | - track[inherits]['option'][cnt].append(trig) |
143 | | - else: |
144 | | - # Totally atomic. |
145 | | - cnt = utils.word_count(pattern) |
146 | | - say("\t\t\tTotally atomic and " + str(cnt) + " words.") |
147 | | - if cnt not in track[inherits]['atomic']: |
148 | | - track[inherits]['atomic'][cnt] = [] |
149 | | - track[inherits]['atomic'][cnt].append(trig) |
150 | | - |
151 | | - # Move the no-{inherits} triggers to the bottom of the stack. |
152 | | - track[highest_inherits + 1] = track[-1] |
153 | | - del(track[-1]) |
154 | | - |
155 | | - # Add this group to the sort list. |
156 | | - for ip in sorted(track.keys()): |
157 | | - say("ip=" + str(ip)) |
158 | | - for kind in ['atomic', 'option', 'alpha', 'number', 'wild']: |
159 | | - for wordcnt in sorted(track[ip][kind], reverse=True): |
160 | | - # Triggers with a matching word count should be sorted |
161 | | - # by length, descending. |
162 | | - running.extend(sorted(track[ip][kind][wordcnt], key=len, reverse=True)) |
163 | | - running.extend(sorted(track[ip]['under'], key=len, reverse=True)) |
164 | | - running.extend(sorted(track[ip]['pound'], key=len, reverse=True)) |
165 | | - running.extend(sorted(track[ip]['star'], key=len, reverse=True)) |
166 | | - return running |
| 103 | + inherit = int(match.group(1)) # Get inherit value from the tag ``{inherit}`` |
| 104 | + say("\t\t\tTrigger belongs to a topic which inherits other topics: level=" + str(inherit)) |
| 105 | + triggers[index][0] = pattern = re.sub(RE.inherit, "", pattern) # Remove the inherit tag if any |
| 106 | + else: |
| 107 | + inherit = sys.maxint # If not found any inherit, set it to the maximum value, to place it last in the sort |
| 108 | + |
| 109 | + trigger_object_list.append(TriggerObj(pattern, index, weight, inherit)) |
| 110 | + |
| 111 | + # Priority order of sorting criteria: weight, inherit, star, pound, under, option, wordcount, len, alphabet |
| 112 | + sorted_list = sorted(trigger_object_list, |
| 113 | + key=attrgetter('weight', 'inherit', 'star', 'pound', |
| 114 | + 'under', 'option', 'wordcount', 'len', 'alphabet')) |
| 115 | + return [triggers[item.index] for item in sorted_list] |
167 | 116 |
|
168 | 117 | def sort_list(items): |
169 | 118 | """Sort a simple list by number of words and length.""" |
|
0 commit comments