Source code for scipost.services

__copyright__ = "Copyright © Stichting SciPost (SciPost Foundation)"
__license__ = "AGPL v3"


# Module for making external api calls as needed in the submissions cycle
import feedparser
import requests
import datetime
import dateutil.parser
import logging

arxiv_logger = logging.getLogger('scipost.services.arxiv')
doi_logger = logging.getLogger('scipost.services.doi')


[docs]class DOICaller:
    def __init__(self, doi_string):
        self.doi_string = doi_string
        doi_logger.info('New DOI call for %s' % doi_string)

        self._call_crosslink()
        if self.is_valid:
            self._format_data()

    def _call_crosslink(self):
        url = 'https://api.crossref.org/works/%s' % self.doi_string
        request = requests.get(url)

        doi_logger.info('GET [{doi}] [request] | {url}'.format(
            doi=self.doi_string,
            url=url,
        ))

        if request.ok:
            self.is_valid = True
            self._crossref_data = request.json()['message']
        else:
            self.is_valid = False

        doi_logger.info('GET [{doi}] [response {valid}] | {response}'.format(
            doi=self.doi_string,
            valid='VALID' if self.is_valid else 'INVALID',
            response=request.text,
        ))

    def _format_data(self):
        data = self._crossref_data
        title = data.get('title', [])[0]

        # author_list is given as a comma separated list of names on the relevant models
        author_list = []
        for author in data.get('author', []):
            try:
                author_list.append('{} {}'.format(author['given'], author['family']))
            except KeyError:
                author_list.append(author['name'])
        author_list = ', '.join(author_list)

        journal = data.get('container-title', [])[0]
        volume = data.get('volume', '')
        pages = self._get_pages(data)
        pub_date = self._get_pub_date(data)

        self.data = {
            'title': title,
            'author_list': author_list,
            'journal': journal,
            'volume': volume,
            'pages': pages,
            'pub_date': pub_date,
        }

        doi_logger.info('GET [{doi}] [formatted data] | {data}'.format(
            doi=self.doi_string,
            data=self.data,
        ))

    def _get_pages(self, data):
        # For Physical Review
        pages = data.get('article-number', '')
        # For other journals?
        if not pages:
            pages = data.get('page', '')
        return pages

    def _get_pub_date(self, data):
        date_parts = data.get('issued', {}).get('date-parts', {})
        if date_parts:
            date_parts = date_parts[0]
            year = date_parts[0]
            month = date_parts[1] if len(date_parts) > 1 else 1
            day = date_parts[2] if len(date_parts) > 2 else 1
            pub_date = datetime.date(year, month, day).isoformat()
        else:
            pub_date = ''

        return pub_date


[docs]class ArxivCaller:
    """ArXiv Caller will help retrieve Submission data from arXiv API."""

    query_base_url = 'https://export.arxiv.org/api/query?id_list=%s'

    def __init__(self, identifier):
        self.identifier = identifier
        arxiv_logger.info('New ArXiv call for identifier %s' % identifier)
        self._call_arxiv()
        if self.is_valid:
            self._format_data()

    def _call_arxiv(self):
        url = self.query_base_url % self.identifier
        request = requests.get(url)
        response_content = feedparser.parse(request.content)
        arxiv_logger.info('GET [{arxiv}] [request] | {url}'.format(
            arxiv=self.identifier,
            url=url,
        ))

        if self._search_result_present(response_content):
            arxiv_data = response_content['entries'][0]
            self.is_valid = True
            self._arxiv_data = arxiv_data
            self.metadata = response_content
        else:
            self.is_valid = False

        arxiv_logger.info('GET [{arxiv}] [response {valid}] | {response}'.format(
            arxiv=self.identifier,
            valid='VALID' if self.is_valid else 'INVALID',
            response=response_content,
        ))

    def _format_data(self):
        data = self._arxiv_data
        title = data['title']
        author_list = [author['name'] for author in data.get('authors', [])]
        # author_list is given as a comma separated list of names on the relevant models (Commentary, Submission)
        author_list = ", ".join(author_list)
        arxiv_link = data['id'].replace('http:', 'https:')
        abstract = data['summary']
        pub_date = dateutil.parser.parse(data['published']).date()

        self.data = {
            'title': title,
            'author_list': author_list,
            'arxiv_link': arxiv_link,
            'pub_abstract': abstract,
            'abstract': abstract,  # Duplicate for Commentary/Submission cross-compatibility
            'pub_date': pub_date,
        }
        arxiv_logger.info('GET [{arxiv}] [formatted data] | {data}'.format(
            arxiv=self.identifier,
            data=self.data,
        ))

    def _search_result_present(self, data):
        if len(data.get('entries', [])) > 0:
            return 'title' in data['entries'][0]
        return False