Update catch errors for load metadata from amazon (#2333)
This commit is contained in:
parent
34478079d8
commit
49692b4a45
@ -25,8 +25,11 @@ try:
|
|||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
from cps.services.Metadata import MetaRecord, MetaSourceInfo, Metadata
|
||||||
|
import cps.logger as logger
|
||||||
|
|
||||||
#from time import time
|
#from time import time
|
||||||
from operator import itemgetter
|
from operator import itemgetter
|
||||||
|
log = logger.create()
|
||||||
|
|
||||||
class Amazon(Metadata):
|
class Amazon(Metadata):
|
||||||
__name__ = "Amazon"
|
__name__ = "Amazon"
|
||||||
@ -48,15 +51,15 @@ class Amazon(Metadata):
|
|||||||
self, query: str, generic_cover: str = "", locale: str = "en"
|
self, query: str, generic_cover: str = "", locale: str = "en"
|
||||||
):
|
):
|
||||||
#timer=time()
|
#timer=time()
|
||||||
def inner(link,index)->[dict,int]:
|
def inner(link, index) -> [dict, int]:
|
||||||
|
try:
|
||||||
with self.session as session:
|
with self.session as session:
|
||||||
r = session.get(f"https://www.amazon.com/{link}")
|
r = session.get(f"https://www.amazon.com{link}")
|
||||||
r.raise_for_status()
|
r.raise_for_status()
|
||||||
long_soup = BS(r.text, "lxml") #~4sec :/
|
long_soup = BS(r.text, "lxml") #~4sec :/
|
||||||
soup2 = long_soup.find("div", attrs={"cel_widget_id": "dpx-books-ppd_csm_instrumentation_wrapper"})
|
soup2 = long_soup.find("div", attrs={"cel_widget_id": "dpx-books-ppd_csm_instrumentation_wrapper"})
|
||||||
if soup2 is None:
|
if soup2 is None:
|
||||||
return
|
return
|
||||||
try:
|
|
||||||
match = MetaRecord(
|
match = MetaRecord(
|
||||||
title = "",
|
title = "",
|
||||||
authors = "",
|
authors = "",
|
||||||
@ -65,7 +68,7 @@ class Amazon(Metadata):
|
|||||||
description="Amazon Books",
|
description="Amazon Books",
|
||||||
link="https://amazon.com/"
|
link="https://amazon.com/"
|
||||||
),
|
),
|
||||||
url = f"https://www.amazon.com/{link}",
|
url = f"https://www.amazon.com{link}",
|
||||||
#the more searches the slower, these are too hard to find in reasonable time or might not even exist
|
#the more searches the slower, these are too hard to find in reasonable time or might not even exist
|
||||||
publisher= "", # very unreliable
|
publisher= "", # very unreliable
|
||||||
publishedDate= "", # very unreliable
|
publishedDate= "", # very unreliable
|
||||||
@ -102,13 +105,15 @@ class Amazon(Metadata):
|
|||||||
match.cover = ""
|
match.cover = ""
|
||||||
return match, index
|
return match, index
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print(e)
|
log.debug_or_exception(e)
|
||||||
return
|
return
|
||||||
|
|
||||||
val = list()
|
val = list()
|
||||||
|
try:
|
||||||
if self.active:
|
if self.active:
|
||||||
results = self.session.get(
|
results = self.session.get(
|
||||||
f"https://www.amazon.com/s?k={query.replace(' ', '+')}&i=digital-text&sprefix={query.replace(' ', '+')}"
|
f"https://www.amazon.com/s?k={query.replace(' ', '+')}"
|
||||||
|
f"&i=digital-text&sprefix={query.replace(' ', '+')}"
|
||||||
f"%2Cdigital-text&ref=nb_sb_noss",
|
f"%2Cdigital-text&ref=nb_sb_noss",
|
||||||
headers=self.headers)
|
headers=self.headers)
|
||||||
results.raise_for_status()
|
results.raise_for_status()
|
||||||
@ -117,6 +122,9 @@ class Amazon(Metadata):
|
|||||||
soup.findAll("div", attrs={"data-component-type": "s-search-result"})]
|
soup.findAll("div", attrs={"data-component-type": "s-search-result"})]
|
||||||
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
||||||
fut = {executor.submit(inner, link, index) for index, link in enumerate(links_list[:5])}
|
fut = {executor.submit(inner, link, index) for index, link in enumerate(links_list[:5])}
|
||||||
val=list(map(lambda x : x.result() ,concurrent.futures.as_completed(fut)))
|
val = list(map(lambda x: x.result(), concurrent.futures.as_completed(fut)))
|
||||||
result=list(filter(lambda x: x, val))
|
result = list(filter(lambda x: x, val))
|
||||||
return [x[0] for x in sorted(result, key=itemgetter(1))] #sort by amazons listing order for best relevance
|
return [x[0] for x in sorted(result, key=itemgetter(1))] #sort by amazons listing order for best relevance
|
||||||
|
except requests.exceptions.HTTPError as e:
|
||||||
|
log.debug_or_exception(e)
|
||||||
|
return []
|
||||||
|
Loading…
Reference in New Issue
Block a user