Skip to content

Commit f05e41e

Browse files
committed
selenium stealth
1 parent a05f1fb commit f05e41e

File tree

1 file changed

+122
-14
lines changed

1 file changed

+122
-14
lines changed

selenium/selenium_headless_stealth.py

Lines changed: 122 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,27 +2,135 @@
22
import undetected_chromedriver as uc
33
from selenium.webdriver.common.by import By
44
from selenium.webdriver.chrome.options import Options
5+
import sys
6+
import argparse
7+
import tty, termios
8+
import json # Add this import at the top
9+
import os # new import
510

11+
def getch():
12+
fd = sys.stdin.fileno()
13+
old = termios.tcgetattr(fd)
14+
try:
15+
tty.setraw(fd)
16+
ch = sys.stdin.read(1)
17+
finally:
18+
termios.tcsetattr(fd, termios.TCSADRAIN, old)
19+
return ch
20+
21+
def save_cookies(driver, file_path):
22+
with open(file_path, 'w') as file:
23+
json.dump(driver.get_cookies(), file)
24+
25+
def load_cookies(driver, file_path):
26+
try:
27+
with open(file_path, 'r') as file:
28+
cookies = json.load(file)
29+
for cookie in cookies:
30+
if cookie.get('domain').startswith('.'):
31+
cookie['domain'] = cookie['domain'][1:]
32+
print(f"cookie: {cookie}")
33+
driver.add_cookie(cookie)
34+
except FileNotFoundError:
35+
print("No cookie file found. Starting fresh.")
36+
37+
38+
def str2bool(value):
39+
if value.lower() in ('yes', 'true', 't', 'y', '1'):
40+
return True
41+
elif value.lower() in ('no', 'false', 'f', 'n', '0'):
42+
return False
43+
else:
44+
raise argparse.ArgumentTypeError('Boolean value is expected.')
45+
46+
parser = argparse.ArgumentParser(description='common description for program')
47+
parser.add_argument('--url',
48+
help='string argument example',
49+
required=True)
50+
parser.add_argument('--output_file',
51+
help='string argument example',
52+
required=False)
53+
parser.add_argument('--visual_check',
54+
help="boolean argument example ",
55+
required=False, type=str2bool, default="false")
56+
parser.add_argument('--cookies_file',
57+
help='Path to the cookies file',
58+
required=False)
59+
parser.add_argument('--profile_dir',
60+
help='Path to Chrome user-data directory to persist session/profile',
61+
required=False)
62+
args = parser.parse_args()
63+
URL:str = args.url
64+
OUTPUT_FILE:str = args.output_file
65+
VISUAL_CHECK:bool = args.visual_check
66+
COOKIES_FILE = args.cookies_file
67+
PROFILE_DIR = args.profile_dir # new variable
68+
69+
LANGUAGE = "de-DE"
70+
TIMEZONE_ID = "Europe/Berlin"
71+
72+
GEO_LATITUDE = 48.137154
73+
GEO_LONGITUDE = 11.576124
74+
GEO_ACCURACY_M = 50
75+
76+
cdp_commands = {
77+
"Emulation.setTimezoneOverride" : {"timezoneId": TIMEZONE_ID},
78+
"Emulation.setLocaleOverride" : {"locale": LANGUAGE},
79+
"Emulation.setGeolocationOverride" : {"latitude": GEO_LATITUDE, "longitude": GEO_LONGITUDE, "accuracy": GEO_ACCURACY_M},
80+
"Browser.grantPermissions": {"permissions": ["geolocation"], "origin": URL}
81+
}
82+
83+
options = ['--disable-gpu', f"--lang={LANGUAGE}", "--disable-blink-features=AutomationControlled"]
84+
if not VISUAL_CHECK:
85+
options.append('--headless')
86+
87+
## open browser
688
chrome_options = Options()
89+
for each_argument in options:
90+
chrome_options.add_argument(each_argument)
791

8-
# Set language
9-
chrome_options.add_argument("--lang=de-DE")
10-
# Disable automation flags
11-
chrome_options.add_argument("--disable-blink-features=AutomationControlled")
92+
# If user provided a profile directory, ensure it exists and pass it to Chrome.
93+
if PROFILE_DIR:
94+
os.makedirs(PROFILE_DIR, exist_ok=True)
95+
chrome_options.add_argument(f"--user-data-dir={PROFILE_DIR}")
1296

13-
# Launch browser
1497
driver = uc.Chrome(options=chrome_options)
1598

16-
# Set timezone
17-
driver.execute_cdp_cmd("Emulation.setTimezoneOverride", {"timezoneId": "Europe/Berlin"})
18-
# Set locale
19-
driver.execute_cdp_cmd("Emulation.setLocaleOverride",{"locale": "de-DE"})
99+
for each_command in cdp_commands:
100+
driver.execute_cdp_cmd(each_command, cdp_commands[each_command])
101+
102+
if COOKIES_FILE:
103+
load_cookies(driver, COOKIES_FILE)
104+
105+
driver.get(URL)
106+
107+
if VISUAL_CHECK:
108+
try:
109+
while True:
110+
sys.stdout.write("Visual check passed — press Yes/Ok/J ( otherwise: Escape )")
111+
sys.stdout.flush()
112+
resp = getch()
113+
resp_ch=resp.strip().lower()
114+
if resp_ch in ( 'y', 'o', 'j', 'Y', 'O', 'J' ):
115+
break
116+
elif resp == '\x1b':
117+
driver.quit()
118+
exit(1)
119+
except (KeyboardInterrupt, EOFError):
120+
# on Ctrl-C / EOF just continue and quit the browser
121+
driver.quit()
122+
exit(2)
20123

21-
# Set geolocation — example: Munich, Germany
22-
driver.execute_cdp_cmd("Emulation.setGeolocationOverride", {"latitude": 48.137154,"longitude": 11.576124,"accuracy": 50})
23-
# Grant geolocation permission automatically
24-
driver.execute_cdp_cmd("Browser.grantPermissions",{"permissions": ["geolocation"],"origin": "https://google.com"})
124+
## open url
125+
html: str = driver.page_source
25126

26-
driver.get("https://www.immowelt.de/classified-search?distributionTypes=Buy,Buy_Auction,Compulsory_Auction&estateTypes=Apartment&locations=AD08DE6440&numberOfRoomsMin=2&priceMax=425000&priceMin=210000&spaceMin=30")
127+
if OUTPUT_FILE is not None:
128+
with open(OUTPUT_FILE, 'w') as file:
129+
file.write(html)
130+
else:
131+
print(html)
27132

133+
if COOKIES_FILE:
134+
save_cookies(driver, COOKIES_FILE)
28135

136+
driver.quit()

0 commit comments

Comments
 (0)