-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmd_to_py.py
More file actions
240 lines (195 loc) · 7.09 KB
/
md_to_py.py
File metadata and controls
240 lines (195 loc) · 7.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
"""
Convert a MarkDown table to a formatted Python list.
"""
from itertools import repeat
from typing import Callable, Iterable, Iterator, TypeVar
T = TypeVar("T")
S = TypeVar("S")
def _get_column_widths(
row: str, delimiter: str = "|", strip_spaces: bool = True
) -> list[int]:
"""
Return a list of column widths. Columns are determined by a delimiter
character.
`strip_spaces` provides a toggle between counting all characters
in each column, versus counting characters excluding leading and
trailing spaces
The length of the returned list should be equal to row.count(delimiter) - 1
"""
if len(delimiter) != 1:
raise ValueError(
f"`delimiter` must be one character, is {len(delimiter)} characters"
)
if delimiter == " ":
raise ValueError("`delimiter` cannot be a space")
count = 0
backward_count = 1
column = -1
output: list[int] = []
starts_with_space = strip_spaces
while count < len(row):
if row[count] != " ":
starts_with_space = False
if row[count] == delimiter:
if strip_spaces:
while row[count - backward_count] == " ":
backward_count += 1
output[column] -= 1
backward_count = 1
column += 1
output.append(-1)
if strip_spaces:
output[column] += 1
starts_with_space = True
if column > -1 and not starts_with_space:
output[column] += 1
count += 1
return output[:-1]
def _pad_columns(row: str, widths: tuple[int, ...] | int, delimiter: str = "|") -> str:
"""
Pad each column (determined by `delimiter`), to a given width.
`widths` can be a single int, which will be used for every column,
or can be a tuple with length row.count(delimiter) - 1, with each
index corresponding to a different column.
Returns padded version of `row`.
"""
if len(delimiter) != 1:
raise ValueError(
f"`delimiter` must be one character, is {len(delimiter)} characters"
)
if delimiter == " ":
raise ValueError("`delimiter` cannot be a space")
column_count = row.count(delimiter) - 1
if isinstance(widths, tuple) and len(widths) != column_count:
raise ValueError(
"`widths` cannot be a tuple of arbitrary length. "
f"Is {len(widths)}, should be {column_count}."
)
if isinstance(widths, int):
widths = tuple(repeat(widths, column_count))
column = 0
backward_count = 1
trailing_space_start = 0
prev_delimiter_index = 0
change_amount = 0
new_row = ""
for delim_loc, char in enumerate(row):
if char != delimiter or delim_loc == 0:
continue
while row[delim_loc - backward_count] == " ":
backward_count += 1
trailing_space_start = delim_loc - backward_count + 1
non_space_len = trailing_space_start - prev_delimiter_index
if widths[column] < non_space_len:
raise ValueError(
f"Width of column `{column}` cannot be less than "
f"{non_space_len}, is {widths[column]}"
)
change_amount = widths[column] - non_space_len
for index in range(
prev_delimiter_index, prev_delimiter_index + non_space_len + 1
):
new_row += row[index]
new_row += " " * change_amount
prev_delimiter_index = delim_loc
backward_count = 1
column += 1
new_row += delimiter
return new_row
def _exclusive_map(
func: Callable[[T], S],
*sequences: Iterable[T],
exclude: frozenset[int] = frozenset(),
) -> Iterator[S]:
"""
Similar to the built-in `map` function, but allows for
exclusion of certain elements of `seq`.
`exclude` should be a set of indices to exclude.
"""
for i, arguments in enumerate(zip(*sequences)):
if i not in exclude:
yield func(*arguments)
def md_table_to_lines(
first_line_idx: int,
last_line_idx: int,
filename: str = "README.md",
remove: frozenset[str] = frozenset(),
) -> list[str]:
"""
Convert a Markdown table to a list of formatted strings.
Arguments
---------
- `first_line_idx` (int): The index of the first line of the markdown
table to be converted.
- `last_line_idx` (int): The index of the last line of the markdown
table to be converted.
- `filename` (str, optional): The name of the file
containing the table. Default is "README.md".
- `remove` (frozenset[str], optional): The set of strings to be
removed from each line. Default is an empty set.
Returns
-------
- `list[str]`: A list of formatted strings representing the converted
Markdown table.
"""
_ = """
## Examples
| Item | Quantity | Price |
| --------- | -------- | ----- |
| Apple | 5 | $1.00 |
| Banana | 3 | $1.50 |
| Orange | 2 | $0.75 |
| Pineapple | 1 | $3.50 |
```python
>>> print("\n".join(md_table_to_lines(23, 29)))
Item Quantity Price
--------------------------
Apple 5 $1.00
Banana 3 $1.50
Orange 2 $0.75
Pineapple 1 $3.50
```
| Flag | Description |
| --------------- | ------------------------------- |
| **-h** | Display help message |
| **-v** | Enable verbose output |
| **-f** FILENAME | Specify input file |
| **-o** FILENAME | Specify output file |
| **-n** | Do not overwrite existing files |
```python
>>> print("\n".join(md_table_to_lines(41, 48, remove=("**",))))
Flag Description
--------------------------------------------
-h Display help message
-v Enable verbose output
-f FILENAME Specify input file
-o FILENAME Specify output file
-n Do not overwrite existing files
```
"""
if last_line_idx <= first_line_idx:
raise ValueError("Last line index must be greater than first line index.")
try:
with open(filename, encoding="utf-8") as markdown_file:
lines = markdown_file.read().splitlines()[
first_line_idx - 1 : last_line_idx - 1
]
except FileNotFoundError as err:
raise FileNotFoundError("File not found.") from err
for i, _ in enumerate(lines):
for item in remove:
lines[i] = lines[i].replace(item, "")
max_column_lengths: tuple[int, ...] = tuple(
map(
lambda iterable: max(iterable) + 2,
zip(*_exclusive_map(_get_column_widths, lines, exclude=frozenset({1}))),
)
)
for i, _ in enumerate(lines):
if i == 1:
lines[1] = "-" * (sum(max_column_lengths) - 2)
continue
lines[i] = _pad_columns(lines[i], max_column_lengths)
for old, new in {" | ": " ", "| ": "", " |": ""}.items():
lines[i] = lines[i].replace(old, new)
return lines