22import undetected_chromedriver as uc
33from selenium .webdriver .common .by import By
44from selenium .webdriver .chrome .options import Options
5+ import sys
6+ import argparse
7+ import tty , termios
8+ import json # Add this import at the top
9+ import os # new import
510
11+ def getch ():
12+ fd = sys .stdin .fileno ()
13+ old = termios .tcgetattr (fd )
14+ try :
15+ tty .setraw (fd )
16+ ch = sys .stdin .read (1 )
17+ finally :
18+ termios .tcsetattr (fd , termios .TCSADRAIN , old )
19+ return ch
20+
21+ def save_cookies (driver , file_path ):
22+ with open (file_path , 'w' ) as file :
23+ json .dump (driver .get_cookies (), file )
24+
25+ def load_cookies (driver , file_path ):
26+ try :
27+ with open (file_path , 'r' ) as file :
28+ cookies = json .load (file )
29+ for cookie in cookies :
30+ if cookie .get ('domain' ).startswith ('.' ):
31+ cookie ['domain' ] = cookie ['domain' ][1 :]
32+ print (f"cookie: { cookie } " )
33+ driver .add_cookie (cookie )
34+ except FileNotFoundError :
35+ print ("No cookie file found. Starting fresh." )
36+
37+
38+ def str2bool (value ):
39+ if value .lower () in ('yes' , 'true' , 't' , 'y' , '1' ):
40+ return True
41+ elif value .lower () in ('no' , 'false' , 'f' , 'n' , '0' ):
42+ return False
43+ else :
44+ raise argparse .ArgumentTypeError ('Boolean value is expected.' )
45+
46+ parser = argparse .ArgumentParser (description = 'common description for program' )
47+ parser .add_argument ('--url' ,
48+ help = 'string argument example' ,
49+ required = True )
50+ parser .add_argument ('--output_file' ,
51+ help = 'string argument example' ,
52+ required = False )
53+ parser .add_argument ('--visual_check' ,
54+ help = "boolean argument example " ,
55+ required = False , type = str2bool , default = "false" )
56+ parser .add_argument ('--cookies_file' ,
57+ help = 'Path to the cookies file' ,
58+ required = False )
59+ parser .add_argument ('--profile_dir' ,
60+ help = 'Path to Chrome user-data directory to persist session/profile' ,
61+ required = False )
62+ args = parser .parse_args ()
63+ URL :str = args .url
64+ OUTPUT_FILE :str = args .output_file
65+ VISUAL_CHECK :bool = args .visual_check
66+ COOKIES_FILE = args .cookies_file
67+ PROFILE_DIR = args .profile_dir # new variable
68+
69+ LANGUAGE = "de-DE"
70+ TIMEZONE_ID = "Europe/Berlin"
71+
72+ GEO_LATITUDE = 48.137154
73+ GEO_LONGITUDE = 11.576124
74+ GEO_ACCURACY_M = 50
75+
76+ cdp_commands = {
77+ "Emulation.setTimezoneOverride" : {"timezoneId" : TIMEZONE_ID },
78+ "Emulation.setLocaleOverride" : {"locale" : LANGUAGE },
79+ "Emulation.setGeolocationOverride" : {"latitude" : GEO_LATITUDE , "longitude" : GEO_LONGITUDE , "accuracy" : GEO_ACCURACY_M },
80+ "Browser.grantPermissions" : {"permissions" : ["geolocation" ], "origin" : URL }
81+ }
82+
83+ options = ['--disable-gpu' , f"--lang={ LANGUAGE } " , "--disable-blink-features=AutomationControlled" ]
84+ if not VISUAL_CHECK :
85+ options .append ('--headless' )
86+
87+ ## open browser
688chrome_options = Options ()
89+ for each_argument in options :
90+ chrome_options .add_argument (each_argument )
791
8- # Set language
9- chrome_options . add_argument ( "--lang=de-DE" )
10- # Disable automation flags
11- chrome_options .add_argument ("--disable-blink-features=AutomationControlled " )
92+ # If user provided a profile directory, ensure it exists and pass it to Chrome.
93+ if PROFILE_DIR :
94+ os . makedirs ( PROFILE_DIR , exist_ok = True )
95+ chrome_options .add_argument (f "--user-data-dir= { PROFILE_DIR } " )
1296
13- # Launch browser
1497driver = uc .Chrome (options = chrome_options )
1598
16- # Set timezone
17- driver .execute_cdp_cmd ("Emulation.setTimezoneOverride" , {"timezoneId" : "Europe/Berlin" })
18- # Set locale
19- driver .execute_cdp_cmd ("Emulation.setLocaleOverride" ,{"locale" : "de-DE" })
99+ for each_command in cdp_commands :
100+ driver .execute_cdp_cmd (each_command , cdp_commands [each_command ])
101+
102+ if COOKIES_FILE :
103+ load_cookies (driver , COOKIES_FILE )
104+
105+ driver .get (URL )
106+
107+ if VISUAL_CHECK :
108+ try :
109+ while True :
110+ sys .stdout .write ("Visual check passed — press Yes/Ok/J ( otherwise: Escape )" )
111+ sys .stdout .flush ()
112+ resp = getch ()
113+ resp_ch = resp .strip ().lower ()
114+ if resp_ch in ( 'y' , 'o' , 'j' , 'Y' , 'O' , 'J' ):
115+ break
116+ elif resp == '\x1b ' :
117+ driver .quit ()
118+ exit (1 )
119+ except (KeyboardInterrupt , EOFError ):
120+ # on Ctrl-C / EOF just continue and quit the browser
121+ driver .quit ()
122+ exit (2 )
20123
21- # Set geolocation — example: Munich, Germany
22- driver .execute_cdp_cmd ("Emulation.setGeolocationOverride" , {"latitude" : 48.137154 ,"longitude" : 11.576124 ,"accuracy" : 50 })
23- # Grant geolocation permission automatically
24- driver .execute_cdp_cmd ("Browser.grantPermissions" ,{"permissions" : ["geolocation" ],"origin" : "https://google.com" })
124+ ## open url
125+ html : str = driver .page_source
25126
26- driver .get ("https://www.immowelt.de/classified-search?distributionTypes=Buy,Buy_Auction,Compulsory_Auction&estateTypes=Apartment&locations=AD08DE6440&numberOfRoomsMin=2&priceMax=425000&priceMin=210000&spaceMin=30" )
127+ if OUTPUT_FILE is not None :
128+ with open (OUTPUT_FILE , 'w' ) as file :
129+ file .write (html )
130+ else :
131+ print (html )
27132
133+ if COOKIES_FILE :
134+ save_cookies (driver , COOKIES_FILE )
28135
136+ driver .quit ()
0 commit comments