Skip to content

Commit ab8c54f

Browse files
committed
Modified original PR to have nicer looking output in the HTML and so that href targets point to the site
Merge branch 'master' of https://github.com/hlein/httpscreenshot into hlein-master Conflicts: screenshotClustering/cluster.py
2 parents 73623b1 + f4b7747 commit ab8c54f

File tree

2 files changed

+15
-3
lines changed

2 files changed

+15
-3
lines changed

httpscreenshot.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@
2828
import shutil
2929
import hashlib
3030

31+
try:
32+
from urllib.parse import quote
33+
except:
34+
from urllib import quote
3135

3236
reload(sys)
3337
sys.setdefaultencoding("utf8")
@@ -181,9 +185,10 @@ def worker(urlQueue, tout, debug, headless, doProfile, vhosts, subs, extraHosts,
181185
except Queue.Empty:
182186
continue
183187
print '[+] '+str(urlQueue.qsize())+' URLs remaining'
184-
screenshotName = urlparse(curUrl[0]).netloc.replace(":", "-")
188+
screenshotName = quote(curUrl[0], safe='')
185189
if(debug):
186190
print '[+] Got URL: '+curUrl[0]
191+
print '[+] screenshotName: '+screenshotName
187192
if(os.path.exists(screenshotName+".png")):
188193
if(debug):
189194
print "[-] Screenshot already exists, skipping"

screenshotClustering/cluster.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@
88
import time
99
from bs4 import BeautifulSoup
1010

11+
try:
12+
from urllib.parse import quote,unquote
13+
except:
14+
from urllib import quote,unquote
15+
1116
def addAttrToBag(attrName,url,link,wordBags,soup):
1217
for tag in soup.findAll('',{attrName:True}):
1318
if(isinstance(tag[attrName],str) or isinstance(tag[attrName],unicode)):
@@ -141,9 +146,11 @@ def renderClusterHtml(clust,width,height,scopeFile=None):
141146

142147
for cluster,siteList in clust.iteritems():
143148
html=html+'<TR>'
144-
html=html+'<TR><TD><img src="'+siteList[0][0:-4]+'png" width='+str(width)+' height='+str(height)+'/></TD></TR>'
149+
screenshotName = quote(siteList[0][0:-4], safe='')
150+
html=html+'<TR><TD><img src="'+screenshotName+'png" width='+str(width)+' height='+str(height)+'/></TD></TR>'
145151
for site in siteList:
146-
html=html+'<TD onmouseout="clearPopup()" onmouseover="popUp(event,\''+site[0:-4]+'png\');"><a href="http://'+site[site.rfind('/')+1:site.rfind('-')]+':'+site[site.rfind('-')+1:site.rfind('.')]+'">'+site[site.rfind('/')+1:site.rfind('-')]+':'+site[site.rfind('-')+1:site.rfind('.')]+'</a></TD>'
152+
screenshotName = quote(site[0:-5], safe='')
153+
html=html+'<TD onmouseout="clearPopup()" onmouseover="popUp(event,\''+screenshotName+'.png\');"><a href="'+unquote(unquote(screenshotName[4:]).decode("utf-8")).decode("utf-8")+'">'+unquote(unquote(screenshotName[4:]).decode("utf-8")).decode("utf-8")+'</a></TD>'
147154
html=html+'</TR>'
148155
html=html+'</table>'
149156
footer = '</BODY></HTML>'

0 commit comments

Comments
 (0)