1"""
2This module defines the `main()` coroutine for the Apify Actor, executed from the `__main__.py` file.
3
4Feel free to modify this file to suit your specific needs.
5
6To build Apify Actors, utilize the Apify SDK toolkit, read more at the official documentation:
7https://docs.apify.com/sdk/python
8"""
9
10from urllib.parse import urljoin
11from seleniumwire import webdriver
12
13from selenium.webdriver.common.by import By
14from selenium.webdriver.common.action_chains import ActionChains
15from apify import Actor
16import re
17from urllib.parse import urljoin
18from bs4 import BeautifulSoup
19import requests
20import requests.exceptions
21import time
22import undetected_chromedriver as uc
23import random
24from .datascrapify import StartProcess, parse_proxy_url,on_aborting_event
25from apify_shared.consts import ActorEventTypes as Event
26import asyncio
27
28
29
30
31
32
33def scrape_contact_emails(link):
34 res = requests.get(link)
35 domain = link.split(".")
36 mailaddr = link
37 soup = BeautifulSoup(res.text,"lxml")
38 links = soup.find_all("a")
39 contact_link = ''
40 final_result = ""
41 try:
42
43 emails = soup.find_all(text=re.compile('.*@'+domain[1]+'.'+domain[2].replace("/","")))
44 emails.sort(key=len)
45 print(emails[0].replace("\n",""))
46 final_result = emails[0]
47 except:
48
49 try:
50 flag = 0
51 for link in links:
52 if "contact" in link.get("href") or "Contact" in link.get("href") or "CONTACT" in link.get("href") or 'contact' in link.text or 'Contact' in link.text or 'CONTACT' in link.text:
53 if len(link.get("href"))>2 and flag<2:
54 flag = flag + 1
55 contact_link = link.get("href")
56
57 except:
58 pass
59
60 domain = domain[0]+"."+domain[1]+"."+domain[2]
61 if(len(contact_link)<len(domain)):
62 domain = domain+contact_link.replace("/","")
63 else:
64 domain = contact_link
65
66 try:
67
68 res = requests.get(domain)
69 soup = BeautifulSoup(res.text,"lxml")
70 emails = soup.find_all(text=re.compile('.*@'+mailaddr[7:].replace("/","")))
71 emails.sort(key=len)
72 try:
73 print(emails[0].replace("\n",""))
74 final_result = emails[0]
75 return final_result
76 except:
77 pass
78 except Exception as e:
79 pass
80
81 return ""
82
83async def main() -> None:
84 """
85 The main coroutine is being executed using `asyncio.run()`, so do not attempt to make a normal function
86 out of it, it will not work. Asynchronous execution is required for communication with Apify platform,
87 and it also enhances performance in the field of web scraping significantly.
88 """
89 async with Actor:
90
91 actor_input = await Actor.get_input() or {}
92 Keyword_val = actor_input.get('Keyword')
93 location_val = actor_input.get('location')
94 social_network_val = actor_input.get('social_network')
95 Country_val = actor_input.get('Country')
96 Email_Type_val = actor_input.get('Email_Type')
97 Other_Email_Type_val = actor_input.get('Other_Email_Type')
98 proxy_settings = actor_input.get('proxySettings')
99
100 proxy_configuration = await Actor.create_proxy_configuration(groups=['GOOGLE_SERP'])
101
102 proxyurl = ''
103 if proxy_configuration and proxy_settings:
104 proxyurl =await proxy_configuration.new_url()
105
106
107
108 if not Keyword_val:
109 Actor.log.info('Please insert keyword')
110 await Actor.push_data({'Email': 'Please insert keyword'})
111 await Actor.exit()
112 return
113
114 if Keyword_val=='TestKeyword':
115 Actor.log.info('Please insert keyword')
116 await Actor.push_data({'Email': 'Please insert Your Keyword'})
117 await Actor.exit()
118 return
119 '''
120 me = await Actor.apify_client.user('me').get()
121 username=me["username"]
122 isPaying='PayingUser'
123 if(me["isPaying"]==False):
124 isPaying='FreeUser'
125
126 try:
127 proxyurl=''
128 USE_Proxy=False
129 if proxyurl:
130 USE_Proxy=True
131 my_proxy_settings = parse_proxy_url(proxyurl)
132 # Call the function from the imported module
133 await StartProcess(
134 "Apify_camp_LinkedinEmail_"+username+"_"+isPaying,
135 "ALLINONE",
136 Keyword_val,
137 location_val,
138 social_network_val,
139 Country_val,
140 "Google",
141 USE_Proxy,
142 my_proxy_settings
143 )
144
145 finally:
146 # Code that always executes (e.g., cleanup)
147 print("This block always runs, regardless of exceptions.")
148
149 await Actor.exit();
150 '''
151
152 l1 = ["it","www","gm","fr","sp","uk","al","ag","ar","am","as","au","aj","bg","bo","be","bh","bk","br","bu","ca","ci","ch","co","cs","hr","cu","ez","dk","ec","eg","en","fi","gg","gr","hk","hu","ic","in","id","ir","iz","ei","is","jm","ja","ke","kn","ks","ku","lg","ly","ls","lh","lu","mc","mk","my","mt","mx","md","mn","mj","mo","np","nl","nz","ni","no","pk","we","pm","pa","pe","rp","pl","po","rq","qa","ro","rs","sm","sa","sg","ri","sn","lo","si","sf","sw","sz","sy","tw","th","ts","tu","ua","ae","uy","uz","ve"]
153 l2= ["Italy","United States","Germany","France","Spain","United Kingdom","Albania","Algeria","Argentina","Armenia","Australia","Austria","Azerbaijan","Bangladesh","Belarus","Belgium","Belize","Bosnia and Herzegovina","Brazil","Bulgaria","Canada","Chile","China","Colombia","Costa Rica","Croatia","Cuba","Czechia","Denmark","Ecuador","Egypt","Estonia","Finland","Georgia","Greece","Hong Kong","Hungary","Iceland","India","Indonesia","Iran","Iraq","Ireland","Israel","Jamaica","Japan","Kenya","Korea","Korea, Republic of","Kuwait","Latvia","Libya","Liechtenstein","Lithuania","Luxembourg","Macao","Macedonia","Malaysia","Malta","Mexico","Moldova, Republic of","Monaco","Montenegro","Morocco","Nepal","Netherlands","New Zealand","Nigeria","Norway","Pakistan","Palestine, State of","Panama","Paraguay","Peru","Philippines","Poland","Portugal","Puerto Rico","Qatar","Romania","Russia","San Marino","Saudi Arabia","Senegal","Serbia","Singapore","Slovakia","Slovenia","South Africa","Sweden","Switzerland","Syrian Arab Republic","Taiwan","Thailand","Tunisia","Turkey","Ukraine","United Arab Emirates","Uruguay","Uzbekistan","Venezuela"]
154 select_index=1
155 select_country='United States'
156 for count, ele in enumerate(l1):
157 if(ele==Country_val):
158 select_index=count
159 break
160
161
162
163 for count, ele in enumerate(l2):
164 if(count==select_index):
165 select_country=ele
166 break
167
168 print(select_country)
169
170 concatstring = ""
171 concatstring = concatstring + Keyword_val
172 option = "( @gmail.com OR @hotmail.com OR @yahoo.com)";
173 if Email_Type_val=="1":
174 if not Other_Email_Type_val:
175 Actor.log.info('Please insert Email Type Domain')
176 await Actor.push_data({'Email': 'Please insert Email Type Domain'})
177 await Actor.exit()
178 return
179 if Other_Email_Type_val.find("@") > -1:
180 option = " ( " + Other_Email_Type_val + " )"
181 else:
182 option = " ( @" + Other_Email_Type_val + " )"
183 concatstring = concatstring + option
184 if location_val:
185 concatstring = concatstring+ " in "+ location_val
186
187
188
189 if social_network_val:
190 concatstring = concatstring + " site:"
191
192 if social_network_val == "linkedin.com/" or social_network_val == "pinterest.com/" :
193 concatstring = concatstring + Country_val + ".";
194
195
196
197
198 if social_network_val == "amazon.com/" :
199 if Country_val=='gm':
200 Country_val='de'
201 elif Country_val=='sp':
202 Country_val='es'
203 elif Country_val=='fr':
204 Country_val='fr'
205 elif Country_val=='uk':
206 Country_val='co.uk'
207 elif Country_val=='as':
208 Country_val='com.au'
209 elif Country_val=='www':
210 Country_val='com'
211 elif Country_val=='in':
212 Country_val='in'
213 elif Country_val=='be':
214 Country_val='com.be'
215 elif Country_val=='br':
216 Country_val='com.br'
217 elif Country_val=='ca':
218 Country_val='ca'
219 elif Country_val=='ch':
220 Country_val='cn'
221 elif Country_val=='eg':
222 Country_val='eg'
223 elif Country_val=='it':
224 Country_val='it'
225 elif Country_val=='ja':
226 Country_val='co.jp'
227 elif Country_val=='mx':
228 Country_val='com.mx'
229 elif Country_val=='nl':
230 Country_val='nl'
231 elif Country_val=='pl':
232 Country_val='pl'
233 elif Country_val=='sa':
234 Country_val='sa'
235 elif Country_val=='sn':
236 Country_val='sg'
237 elif Country_val=='sw':
238 Country_val='se'
239 elif Country_val=='tu':
240 Country_val='com.tr'
241 elif Country_val=='ae':
242 Country_val='ae'
243 elif Country_val=='ae':
244 Country_val='ae'
245 else :
246 Country_val=='com'
247
248 social_network_val=social_network_val.replace('.com','.'+Country_val)
249
250
251 concatstring = concatstring + "" + social_network_val + "";
252
253 SearchEngine='Google'
254 desktop_user_agents = ["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
255 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
256 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.139 Safari/537.36",
257 "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
258
259 "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:125.0) Gecko/20100101 Firefox/125.0",
260 "Mozilla/5.0 (Windows NT 10.0; rv:124.0) Gecko/20100101 Firefox/124.0",
261 "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0",
262
263 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.6312.86 Safari/537.36 Edg/123.0.2420.65",
264 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.112 Safari/537.36 Edg/122.0.2365.66",
265
266 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 OPR/96.0.0.0",
267
268 "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.129 Safari/537.36",
269 "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_6_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36",
270
271 "Mozilla/5.0 (Macintosh; Intel Mac OS X 13.5; rv:124.0) Gecko/20100101 Firefox/124.0",
272 "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:123.0) Gecko/20100101 Firefox/123.0",
273
274 "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.4 Safari/605.1.15",
275 "Mozilla/5.0 (Macintosh; Intel Mac OS X 12_3_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.4 Safari/605.1.15",
276
277 "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.184 Safari/537.36",
278 "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
279
280 "Mozilla/5.0 (X11; Linux x86_64; rv:125.0) Gecko/20100101 Firefox/125.0",
281 "Mozilla/5.0 (X11; Linux x86_64; rv:124.0) Gecko/20100101 Firefox/124.0",
282 "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:123.0) Gecko/20100101 Firefox/123.0",
283
284 "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_2_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15",
285
286 "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_4_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.6312.86 Safari/537.36 Edg/123.0.2420.65",
287
288 "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_2_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36 OPR/96.0.0.0"
289 ]
290
291 mobile_user_agents = [
292
293 "Mozilla/5.0 (Linux; Android 13; Pixel 7 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.6312.105 Mobile Safari/537.36",
294 "Mozilla/5.0 (Linux; Android 12; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36",
295 "Mozilla/5.0 (Linux; Android 11; Pixel 4a) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.6167.140 Mobile Safari/537.36",
296
297
298 "Mozilla/5.0 (Linux; Android 13; SAMSUNG SM-S911B) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/24.0 Chrome/123.0.0.0 Mobile Safari/537.36",
299 "Mozilla/5.0 (Linux; Android 12; SAMSUNG SM-A525F) AppleWebKit/537.36 (KHTML, like Gecko) SamsungBrowser/23.0 Chrome/121.0.0.0 Mobile Safari/537.36",
300
301
302 "Mozilla/5.0 (Linux; Android 13; M2101K6G) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.6312.105 Mobile Safari/537.36",
303
304
305 "Mozilla/5.0 (iPhone; CPU iPhone OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
306 "Mozilla/5.0 (iPhone; CPU iPhone OS 16_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1",
307 "Mozilla/5.0 (iPhone; CPU iPhone OS 15_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Mobile/15E148 Safari/604.1",
308
309
310 "Mozilla/5.0 (iPad; CPU OS 17_4 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1",
311 "Mozilla/5.0 (iPad; CPU OS 15_5 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15E148 Safari/604.1",
312
313
314 "Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/537.36 (KHTML, like Gecko) CriOS/123.0.0.0 Mobile/15E148 Safari/604.1",
315
316
317 "Mozilla/5.0 (Linux; Android 13; Pixel 6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Mobile Safari/537.36 OPR/76.0.4017.123",
318
319
320 "Mozilla/5.0 (Linux; Android 13; Pixel 6 Pro) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Mobile Safari/537.36 EdgA/123.0.2420.64",
321
322
323 "Mozilla/5.0 (Android 13; Mobile; rv:124.0) Gecko/124.0 Firefox/124.0",
324 "Mozilla/5.0 (Android 12; Mobile; rv:122.0) Gecko/122.0 Firefox/122.0"
325]
326
327
328 all_user_agents = desktop_user_agents + mobile_user_agents
329 random_user_agent = random.choice(all_user_agents)
330
331
332 qry="https://google.com/search?q="+ concatstring
333 Actor.log.info(concatstring)
334
335
336
337 all_users = []
338 query =concatstring
339 max_pages = 100
340 results = []
341 for page in range(max_pages):
342 start = page * 10
343 url = f"http://www.google.com/search?q={query}&num=10&hl=en&start={start}"
344
345
346
347 proxies=None
348 if proxyurl:
349 proxies = {'http': proxyurl, 'https': proxyurl}
350
351 response = requests.get(
352 url,
353 proxies=proxies,
354 headers={
355 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
356 "AppleWebKit/537.36 (KHTML, like Gecko) "
357 "Chrome/120.0.0.0 Safari/537.36"
358 },
359 timeout=30,
360 )
361
362 if response.status_code != 200:
363 Actor.log.warning(f"⚠️ Request failed: {response.status_code} {response.reason}")
364 continue
365
366 soup = BeautifulSoup(response.text, "html.parser")
367 result_blocks = soup.select("div.g, div.tF2Cxc")
368
369 for j, block in enumerate(result_blocks):
370 Email=''
371 title_el = block.select_one("h3")
372 link_el = block.select_one("a")
373 snippet_el = block.select_one(".VwiC3b, .IsZvec, .aCOpRe")
374
375
376
377 title= title_el.get_text(strip=True)
378 url =link_el.get("href")
379 snippet= snippet_el.get_text(strip=True)
380 print(title)
381 alltext = title+snippet
382 match = re.findall(r'[a-zA-Z0-9\.\-+_]+@[a-zA-Z0-9\.\-+_]+\.[a-zA-Z]+', alltext)
383 Actor.log.info('match email '+str(len(match)))
384 if len(match)>0:
385 for i in match:
386 Email=i
387 Actor.log.info('email '+Email)
388 else:
389 match_website = re.findall(r'\\b(?:https?://|www\\.)\\S+\\b', alltext)
390 for i in match_website:
391 Website = "http://www."+i;
392 Actor.log.info('Website '+Website)
393 Email=scrape_contact_emails(Website)
394 if Email :
395 existindb=False
396
397 if len(all_users)>0:
398 for item in all_users:
399 if item['Email'] == Email :
400 existindb=True
401 break
402
403 if existindb==False:
404 all_users.append({'Email': Email});
405 await Actor.push_data({'Email': Email, 'title': title,'Description':alltext,'Detail_Link':url})
406
407
408 await asyncio.sleep(random.uniform(1.5, 3.0))
409
410 await Actor.exit()
411
412
413
414 Actor.log.info('Launching Chrome WebDriver...')
415 chrome_options = uc.ChromeOptions()
416 chrome_options.add_argument(f'user-agent={random_user_agent}')
417 chrome_options.add_argument("--disable-blink-features=AutomationControlled")
418
419
420 chrome_options.add_argument('--no-sandbox')
421 chrome_options.add_argument('--disable-dev-shm-usage')
422 chrome_options.add_argument("--disable-blink-features=AutomationControlled")
423
424
425
426
427 seleniumwire_options={}
428 if(proxyurl):
429 print('apply proxy')
430 chrome_options.add_argument(f" - proxy-server={proxyurl}")
431
432
433
434
435
436
437
438
439 driver = uc.Chrome(options=chrome_options, use_subprocess=False,version_main = 137)
440 driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
441 "source": """
442 Object.defineProperty(navigator, 'webdriver', {
443 get: () => undefined
444 })
445 """
446 })
447
448
449
450
451
452
453 driver.get("https://httpbin.io/ip")
454
455 ip_address = driver.find_element(By.TAG_NAME, "body").text
456
457 print(ip_address)
458 try:
459 driver.get(qry)
460 except Exception as e:
461 print(e)
462 await Actor.push_data({'Email': e})
463 driver.quit()
464
465 checkount=0
466 try:
467 while(driver.current_url.find("sorry/index")>-1 and checkount<100):
468 print('captcha')
469 print(driver.current_url)
470 time.sleep(3)
471 checkount=checkount+1
472 except Exception as e:
473 print(e)
474
475 all_users = []
476 start=0
477
478 if(SearchEngine=='Yahoo'):
479 print('yahoo')
480 else:
481 try:
482 while True:
483 consent = driver.find_elements(By.XPATH,"//iframe[contains(@src, 'consent.google.com')]")
484 if len(consent)>0 :
485 driver.switch_to.frame(consent[0]);
486 driver.find_element(By.id("id")).click();
487
488 popup = driver.find_elements(By.CSS_SELECTOR,"button[class='yt-spec-button-shape-next yt-spec-button-shape-next--filled yt-spec-button-shape-next--mono yt-spec-button-shape-next--size-m']");
489 if (len(popup) > 0):
490 Actor.log.info("popup")
491 for n in range(0,len(popup)):
492 if (popup[n].text=="Accept all"):
493 popup[n].click();
494
495 popup_accept = driver.find_elements(By.CSS_SELECTOR,"button[id='L2AGLb']");
496 if (len(popup_accept) > 0):
497 popup_accept[0].click();
498 time.sleep(1)
499
500 checkconsent=0;
501 while len(driver.find_elements(By.CSS_SELECTOR,"div[class='HTjtHe'][style*='display: block']")) > 0 and checkconsent<100:
502 Actor.log.info('checkconsent '+str(checkconsent))
503 Actor.log.info("homeurl" + driver.current_url)
504 checkconsent=checkconsent+1
505 time.sleep(1)
506
507 AllClasses = driver.find_elements(By.CSS_SELECTOR,"div.tF2Cxc")
508 print(len(AllClasses))
509
510
511
512
513
514
515 homeurl = driver.current_url
516 AllClasses = driver.find_elements(By.CSS_SELECTOR,"div.tF2Cxc")
517 if social_network_val=="youtube.com/" or social_network_val=="instagram.com/":
518 AllClasses = driver.find_elements(By.XPATH,"//div[contains(@class,'MjjYud')]")
519 Actor.log.info("homeurl" + driver.current_url)
520
521 checkount=0
522 try:
523 while(driver.current_url.index("sorry/index")>-1 and checkount<100):
524 print('captcha')
525 time.sleep(3)
526 checkount=checkount+1
527 except Exception as e:
528 print(e)
529
530 if(len(AllClasses)==0):
531 print('check new attribut')
532 AllClasses = driver.find_elements(By.CSS_SELECTOR,"div.fP1Qef")
533 print(len(AllClasses))
534 print(driver.find_elements(By.CSS_SELECTOR,"div[id='main']"))
535
536
537
538 if len(AllClasses)>0:
539 start=1
540 Actor.log.info("Result" + str(len(AllClasses)))
541 for gr in range(0,len(AllClasses)):
542 try:
543 BusinessName=""
544 DetailsLink=""
545 Email=""
546 Address=""
547 businessdetail = AllClasses[gr].find_elements(By.CSS_SELECTOR,"h3.LC20lb")
548 if len(businessdetail) > 0:
549 BusinessName = businessdetail[0].text;
550 DetailsLink = businessdetail[0].find_element(By.XPATH,"parent::*").get_attribute("href")
551 if not DetailsLink:
552 businessdetailnew = AllClasses[gr].find_elements(By.CSS_SELECTOR,"div.TbwUpd")
553 if len(businessdetailnew) > 0:
554 DetailsLink = businessdetailnew[0].find_elements(By.XPATH,"parent::*").get_attribute("href")
555 Ele_addressdetail = AllClasses[gr].find_elements(By.CSS_SELECTOR,"div.VwiC3b")
556 if len(Ele_addressdetail) > 0:
557 Address = Ele_addressdetail[0].text.replace(";", "-").replace(",", "-");
558
559 alltext = AllClasses[gr].text
560 match = re.findall(r'[a-zA-Z0-9\.\-+_]+@[a-zA-Z0-9\.\-+_]+\.[a-zA-Z]+', alltext)
561 Actor.log.info('match email '+str(len(match)))
562 if len(match)>0:
563 for i in match:
564 Email=i
565 Actor.log.info('email '+Email)
566 else:
567 match_website = re.findall(r'\\b(?:https?://|www\\.)\\S+\\b', BusinessName+Address)
568 for i in match_website:
569 Website = "http://www."+i;
570 Actor.log.info('Website '+Website)
571 Email=scrape_contact_emails(Website)
572 if Email :
573 existindb=False
574
575 if len(all_users)>0:
576 for item in all_users:
577 if item['Email'] == Email :
578 existindb=True
579 break
580 if existindb==False:
581 all_users.append({'Email': Email});
582 await Actor.push_data({'Email': Email, 'title': BusinessName,'Description':alltext,'Detail_Link':DetailsLink})
583
584
585 except Exception as err:
586 Actor.log.info(f"Unexpected {err=}, {type(err)=}")
587
588 if len(driver.find_elements(By.CSS_SELECTOR,"a#pnnext")) > 0:
589 Actor.log.info('Click Next')
590 str_url = driver.current_url
591
592 driver.execute_script("arguments[0].click();", driver.find_elements(By.CSS_SELECTOR,"a#pnnext")[0]);
593 time.sleep(3)
594 while driver.current_url == str_url:
595 tryresult = 0;
596 time.sleep(1);
597 tryresult = tryresult + 1;
598 if tryresult > 20:
599 driver.quit();
600 break;
601 else:
602 Actor.log.info('Click Next_1')
603 AllCOUNT_PREVIOUS = len(driver.find_elements(By.CSS_SELECTOR,"div.tF2Cxc"))
604 if social_network_val=="youtube.com/" or social_network_val=="instagram.com/":
605 AllCOUNT_PREVIOUS = len(driver.find_elements(By.XPATH,"//div[contains(@class,'MjjYud')]"))
606 Actor.log.info('Click Next_1_T1')
607 str_url = driver.current_url
608 action = ActionChains(driver)
609
610
611 tryresult = 0;
612 if len(driver.find_elements(By.CSS_SELECTOR,"span.RVQdVd")) > 0:
613 action = ActionChains(driver)
614 selectedlink1 = driver.find_element(By.CSS_SELECTOR,"span.RVQdVd")
615 action.move_to_element(selectedlink1).click().perform()
616
617 time.sleep(3)
618 Actor.log.info('Click Next_1_T2')
619 AllCOUNT_Now = len(driver.find_elements(By.CSS_SELECTOR,"div.tF2Cxc"))
620 if social_network_val=="youtube.com/":
621 AllCOUNT_Now = len(driver.find_elements(By.XPATH,"//div[contains(@class,'MjjYud')]"))
622
623 while (AllCOUNT_PREVIOUS == AllCOUNT_Now):
624 time.sleep(1)
625 tryresult = tryresult + 1;
626 Actor.log.info('Click Next_1_T3_tryresult '+str(tryresult))
627 if (tryresult > 20):
628 driver.quit();
629 break;
630
631 AllCOUNT_Now = len(driver.find_elements(By.CSS_SELECTOR,"div.tF2Cxc"))
632 if social_network_val=="youtube.com/" or social_network_val=="instagram.com/" :
633 AllCOUNT_Now = len(driver.find_elements(By.XPATH,"//div[contains(@class,'MjjYud')]"))
634 print('AllCOUNT_PREVIOUS'+str(AllCOUNT_PREVIOUS))
635 print('AllCOUNT_Now'+str(AllCOUNT_Now))
636 if(AllCOUNT_PREVIOUS==AllCOUNT_Now):
637 break
638 time.sleep(3)
639 else:
640 if(start==0):
641 await Actor.push_data({'Email': 'No Data Found, Due to google not respond. May be proxy problem'})
642 driver.quit();
643 break;
644 print('done')
645 except Exception:
646 Actor.log.exception(f'Cannot extract data from .')
647
648 driver.quit()