Source code for scipost.services

__copyright__ = "Copyright © Stichting SciPost (SciPost Foundation)"
__license__ = "AGPL v3"


# Module for making external api calls as needed in the submissions cycle
import feedparser
import requests
import datetime
import dateutil.parser
import logging

arxiv_logger = logging.getLogger('scipost.services.arxiv')
doi_logger = logging.getLogger('scipost.services.doi')


[docs]class DOICaller: def __init__(self, doi_string): self.doi_string = doi_string doi_logger.info('New DOI call for %s' % doi_string) self._call_crosslink() if self.is_valid: self._format_data() def _call_crosslink(self): url = 'https://api.crossref.org/works/%s' % self.doi_string request = requests.get(url) doi_logger.info('GET [{doi}] [request] | {url}'.format( doi=self.doi_string, url=url, )) if request.ok: self.is_valid = True self._crossref_data = request.json()['message'] else: self.is_valid = False doi_logger.info('GET [{doi}] [response {valid}] | {response}'.format( doi=self.doi_string, valid='VALID' if self.is_valid else 'INVALID', response=request.text, )) def _format_data(self): data = self._crossref_data title = data.get('title', [])[0] # author_list is given as a comma separated list of names on the relevant models author_list = [] for author in data.get('author', []): try: author_list.append('{} {}'.format(author['given'], author['family'])) except KeyError: author_list.append(author['name']) author_list = ', '.join(author_list) journal = data.get('container-title', [])[0] volume = data.get('volume', '') pages = self._get_pages(data) pub_date = self._get_pub_date(data) self.data = { 'title': title, 'author_list': author_list, 'journal': journal, 'volume': volume, 'pages': pages, 'pub_date': pub_date, } doi_logger.info('GET [{doi}] [formatted data] | {data}'.format( doi=self.doi_string, data=self.data, )) def _get_pages(self, data): # For Physical Review pages = data.get('article-number', '') # For other journals? if not pages: pages = data.get('page', '') return pages def _get_pub_date(self, data): date_parts = data.get('issued', {}).get('date-parts', {}) if date_parts: date_parts = date_parts[0] year = date_parts[0] month = date_parts[1] if len(date_parts) > 1 else 1 day = date_parts[2] if len(date_parts) > 2 else 1 pub_date = datetime.date(year, month, day).isoformat() else: pub_date = '' return pub_date
[docs]class ArxivCaller: """ArXiv Caller will help retrieve Submission data from arXiv API.""" query_base_url = 'https://export.arxiv.org/api/query?id_list=%s' def __init__(self, identifier): self.identifier = identifier arxiv_logger.info('New ArXiv call for identifier %s' % identifier) self._call_arxiv() if self.is_valid: self._format_data() def _call_arxiv(self): url = self.query_base_url % self.identifier request = requests.get(url) response_content = feedparser.parse(request.content) arxiv_logger.info('GET [{arxiv}] [request] | {url}'.format( arxiv=self.identifier, url=url, )) if self._search_result_present(response_content): arxiv_data = response_content['entries'][0] self.is_valid = True self._arxiv_data = arxiv_data self.metadata = response_content else: self.is_valid = False arxiv_logger.info('GET [{arxiv}] [response {valid}] | {response}'.format( arxiv=self.identifier, valid='VALID' if self.is_valid else 'INVALID', response=response_content, )) def _format_data(self): data = self._arxiv_data title = data['title'] author_list = [author['name'] for author in data.get('authors', [])] # author_list is given as a comma separated list of names on the relevant models (Commentary, Submission) author_list = ", ".join(author_list) arxiv_link = data['id'].replace('http:', 'https:') abstract = data['summary'] pub_date = dateutil.parser.parse(data['published']).date() self.data = { 'title': title, 'author_list': author_list, 'arxiv_link': arxiv_link, 'pub_abstract': abstract, 'abstract': abstract, # Duplicate for Commentary/Submission cross-compatibility 'pub_date': pub_date, } arxiv_logger.info('GET [{arxiv}] [formatted data] | {data}'.format( arxiv=self.identifier, data=self.data, )) def _search_result_present(self, data): if len(data.get('entries', [])) > 0: return 'title' in data['entries'][0] return False