;;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*-
;;;; *************************************************************************
;;;; FILE IDENTIFICATION
;;;;
;;;; Name:          pubmed-src.lisp
;;;; Purpose:       Library to access PubMed web application
;;;; Programmer:    Kevin M. Rosenberg
;;;; Date Started:  Jun 2001
;;;;
;;;; $Id: pubmed-src.lisp 9043 2004-04-17 18:24:17Z kevin $
;;;;
;;;; This file, part of cl-pubmed, is Copyright (c) 2002 by Kevin M. Rosenberg
;;;;
;;;; cl-pubmed users are granted the rights to distribute and use this software
;;;; as governed by the terms of the GNU Lesser General Public License 
;;;; (http://www.gnu.org/licenses/lgpl.html)
;;;; *************************************************************************

(in-package #:pubmed)


(defparameter +pubmed-host+ "www.ncbi.nlm.nih.gov")
(defparameter +pubmed-query-url+ "/entrez/utils/pmqty.fcgi")
(defparameter +pubmed-fetch-url+ "/entrez/utils/pmfetch.fcgi")
(defparameter *proxy-host* nil)


(define-condition pubmed-condition ()
  ())

  
(define-condition pubmed-server-error (error pubmed-condition)
  ((response :initarg :response
	     :initform nil
	     :reader pubmed-condition-response))
  (:report (lambda (c stream)
	     (format stream "A PubMed server error occurred.")
	     (awhen (pubmed-condition-response c)
		    (format stream " The server response was:~&~S" it)))))

(define-condition pubmed-query-error (error pubmed-condition)
  ((response :initarg :response
	     :initform nil
	     :reader pubmed-condition-response))
  (:report (lambda (c stream)
	     (format stream "A PubMed server error occurred.")
	     (awhen (pubmed-condition-response c)
		    (format stream " The server response was:~&~S" it)))))
  
;;; Article-Set and Article Classes

(defclass pm-article-set ()
  ((query :type string :initarg :query :accessor articles-query)
   (articles :type list :initarg :articles :accessor articles)
   (total :type fixnum :initarg :total :accessor articles-total)
   (count :type fixnum :initarg :count :accessor articles-count)
   (start :type fixnum :initarg :start :accessor articles-start))
  (:documentation "Pubmed Article Set Class")
  (:default-initargs :total 0 :start 0 :count 0
		     :query nil :articles nil))

(defclass pm-article ()
  (
   (pmid :type integer :accessor article-pmid)
   (title :type string :accessor article-title)
   (authors :type list :accessor article-authors)
   (affiliation :type string :accessor article-affiliation)
   (journal :type string :accessor article-journal)
   (date :type string :accessor article-date)
   (volume :type string :accessor article-volume)
   (issue :type string :accessor article-issue)
   (pages :type string :accessor article-pages)
   (abstract :type string :accessor article-abstract)
   (mesh-headings :type list :accessor article-mesh-headings))
  (:documentation "Pubmed Article Class"))

(defmethod print-object ((obj pm-article-set) (s stream))
  (print-unreadable-object (obj s :type t :identity t)
    (format s "~d total articles, ~d articles starting at #~d" 
	    (articles-total obj)
	    (articles-count obj)
	    (articles-start obj)
	    )))

(defmethod print-object ((obj pm-article) (s stream))
  (print-unreadable-object (obj s :type t :identity t)
    (format s "pmid:~d, title:~S" (article-pmid obj)
	    (article-title obj))))

(defun article-equal-p (a b)
  (check-type a pm-article)
  (check-type b pm-article)
  (eql (article-pmid a) (article-pmid b)))

(defun article-ref (art)
  "Return a string of publication data for an article"
  (let ((ref ""))
    (awhen (article-date art)
	   (string-append ref (format nil "~a; " it)))
    (awhen (article-volume art)
	   (string-append ref it))
    (awhen (article-issue art)
	   (string-append ref (format nil "(~a)" it)))
    (awhen (article-pages art)
	   (string-append ref (format nil ":~a" it)))
    ref))

(defmethod print-article-set ((artset pm-article-set)
			      &key (os *standard-output*) (format :text)
			      (complete nil) (print-link nil))
  "Display an article set to specified stream in specified format"
  (dotimes (i (articles-count artset) artset)
    (if (nth i (articles artset))
	(print-article (nth i (articles artset)) :os os :format format 
		       :complete complete :print-link print-link)
      (princ "NULL Article" os))))

(defmethod print-article ((art pm-article) &key (os *standard-output*)
			  (format :text) (complete nil) (print-link nil))
  "Display an article"
  (ecase format
    (:text
     (format os "~a~%~a~%~a~a ~a~%~a~%" 
	     (article-title art)
	     (list-to-delimited-string (article-authors art) ", ")
	     (aif (article-affiliation art)
		  (format nil "~a~%" it) "")
	     (article-journal art) (article-ref art)
	     (aif (article-abstract art) 
		  (if complete
		      it
		    "Abstract available") 
		  "No abstract available")
	     (when complete
	       (format os "~a~%" (article-mesh-headings art)))))
     (:html
      (let ((has-link (or (article-abstract art) (article-mesh-headings art))))
	(when (and print-link has-link)
	  (format os "<a href=\"~A\">" (funcall print-link
						(article-pmid art))))
	(format os "<div class=\"article-title\">~a</div>~%"
		(article-title art))
	(when (and print-link has-link)
	  (format os "</a>"))
	(format os "<div class=\"article-authors\">~a</div>~%"
		(list-to-delimited-string (article-authors art) ", "))
	(format os "<div class=\"article-reference\">~a ~a</div>~%" 
		(article-journal art) (article-ref art))
	(when (and complete (article-abstract art))
	  (format os "<div class=\"article-abstract\">~a</div>~%" 
		  (article-abstract art)))
	(when (and complete (article-mesh-headings art))
	  (format os "<div class=\"mesh-heading-title\">Mesh Headings:</div>")
	  (dolist (mh (article-mesh-headings art))
	    (format os "<div class=\"mesh-heading\">~a</div>~%" mh)))
	(format os "<p/>~%"))))
  art)


;;; PubMed Query Functions

(defun pm-query (searchstr &key maximum start)
  "Performs PubMed query and fetch and returns article-set structure"
    (multiple-value-bind 
	(results status) 
	(pubmed-search-xml searchstr :maximum maximum :start start)
      (when (xml-tag-contents "Count" status)
	   (let ((as (make-instance 'pm-article-set)))
	     (setf 
		 (articles-total as) (parse-integer (xml-tag-contents "Count" status))
		 (articles-query as) searchstr
		 (articles-start as) (parse-integer (xml-tag-contents "DispStart" status))
		 (articles-count as) (parse-integer (xml-tag-contents "DispMax" status))
		 (articles as) (extract-article-set results))
	     as))))

(defun pm-fetch-ids (pmids)
  "Fetchs list of Pubmed ID's and returns pm-article-set class"
  (setq pmids (mklist pmids))
  (let ((results (pubmed-fetch-pmids-xml pmids)))
    (unless (xml-tag-contents "Error" results)
      (let ((as (make-instance 'pm-article-set)))
	(setf 
	    (articles-total as) (length pmids)
	    (articles-query as) (list-to-delimited-string pmids #\,)
	    (articles-start as) 0
	    (articles-count as) (length pmids)
	    (articles as) (extract-article-set results))
	as))))

#+ignore
(defun pubmed-search-tree (searchstr &key maximum start)
  "Performs a pubmed search and returns two values: 
tree of PubMed search results and tree of PubMed search status"
  (multiple-value-bind
      (xml-search-results xml-search-status)
      (pubmed-search-xml searchstr :maximum maximum :start start)
    (if xml-search-results
	(values (parse-xml-no-ws xml-search-results) 
		(parse-xml-no-ws xml-search-status))
      (values nil (parse-xml-no-ws xml-search-status)))))

(defun pubmed-search-xml (searchstr &key maximum start)
  "Performs a Pubmed search and returns two values: 
XML string of PubMed search results and XML search status"
  (multiple-value-bind 
      (pmids search-status)
      (pubmed-query-xml searchstr :maximum maximum :start start)
    (values (pubmed-fetch-pmids-xml pmids) search-status)))

(defun pubmed-query-xml (searchstr &key maximum start)
  "Performs a Pubmed search and returns two values:
 list of PubMed ID's that match search string and XML search status"
  (let ((search-results (pubmed-query-status searchstr :maximum maximum :start start)))
    (values (extract-pmid-list search-results) search-results)))

(defun pubmed-query-status (searchstr &key start maximum)
  "Performs a Pubmed search and returns XML results of PubMed search
 which contains PubMed ID's and status results"
  (let ((query-alist `(("db" . "m") ("term" . ,searchstr) ("mode" . "xml"))))
    (when maximum (push (cons "dispmax" maximum) query-alist))
    (when start (push (cons "dispstart" start) query-alist))
    (net.aserve.client:do-http-request
     (format nil "http://~a~a" +pubmed-host+ +pubmed-query-url+)
     :method :get
     :query query-alist
     :proxy *proxy-host*)))

(defun pubmed-fetch-pmids-xml (pmids)
  "Fetch articles for a list of PubMed ID's and return XML string"
  (setq pmids (mklist pmids)) ;; Ensure list
  (when pmids
      (net.aserve.client:do-http-request
       (format nil "http://~a~a" +pubmed-host+ +pubmed-fetch-url+)
       :method :get
       :query 
       `(("db" . "PubMed") ("report" . "xml") ("mode" . "text")
	 ("id" . ,(list-to-delimited-string pmids #\,)))
       :proxy *proxy-host*)))

;;; XML Extraction Routines

(defun extract-article-set (results)
  "Extract article set from PubMed XML string, return results in pm-article-set class"
  (multiple-value-bind (as-start as-end as-next) 
      (positions-xml-tag-contents "PubmedArticleSet" results)
    (declare (ignore as-end as-next))
    (when as-start
      (let ((done nil)
	    (articles '())
	    (pos as-start))
	(until done
	       (multiple-value-bind
		   (a-start a-end a-next)
		   (positions-xml-tag-contents "PubmedArticle" results pos)
		 (if a-start
		     (progn
		       (push (extract-article results a-start a-end) articles)
		       (setq pos a-next)
		       )
		   (setq done t))))
	(nreverse articles)))))

(defun extract-article (xmlstr a-start a-end)
  "Extract article contents from PubMed XML string and return results in pm-article class"
  (let ((article (make-instance 'pm-article)))
    (setf 
	(article-pmid article) (parse-integer (xml-tag-contents "PMID" xmlstr a-start a-end))
	(article-title article) (xml-tag-contents "ArticleTitle" xmlstr a-start a-end)
	(article-journal article) (xml-tag-contents "MedlineTA" xmlstr a-start a-end)
	(article-pages article) (xml-tag-contents "MedlinePgn" xmlstr a-start a-end)
	(article-affiliation article) (xml-tag-contents "Affiliation" xmlstr a-start a-end)
	(article-abstract article) (xml-tag-contents "AbstractText" xmlstr a-start a-end))
    (multiple-value-bind (ji-start ji-end ji-next)
	(positions-xml-tag-contents "JournalIssue" xmlstr a-start a-end)
      (declare (ignore ji-next))
      (setf
	  (article-volume article) (xml-tag-contents "Volume" xmlstr ji-start ji-end)
	  (article-issue article) (xml-tag-contents "Issue" xmlstr ji-start ji-end))
      (aif (xml-tag-contents "MedlineDate" xmlstr ji-start ji-end)
	   (setf (article-date article) it)
	   (setf (article-date article)
	     (concatenate 'string (xml-tag-contents "Year" xmlstr ji-start ji-end)
			  (aif (xml-tag-contents "Month" xmlstr ji-start ji-end)
			       (format nil " ~a" it)
			       "")))))
	  
    (multiple-value-bind (al-start al-end al-next)
	(positions-xml-tag-contents "AuthorList" xmlstr a-start a-end)
      (declare (ignore al-next))
      (setf (article-authors article)
	(when al-start
	    (let ((done nil)
		  (authors '())
		  (pos al-start))
	      (until done
		     (multiple-value-bind
			 (au-start au-end au-next)
			 (positions-xml-tag-contents "Author" xmlstr pos al-end)
		       (if au-start
			   (progn
			     (push (extract-author xmlstr au-start au-end) authors)
			     (setq pos au-next))
			 (setq done t))))
	      (nreverse authors)))))

    (multiple-value-bind (mhl-start mhl-end mhl-next)
	(positions-xml-tag-contents "MeshHeadingList" xmlstr a-start a-end)
      (declare (ignore mhl-next))
      (setf (article-mesh-headings article)
	(when mhl-start
	    (let ((done nil)
		  (mesh-headings '())
		  (pos mhl-start))
	      (until done
		     (multiple-value-bind
			 (mh-start mh-end mh-next)
			 (positions-xml-tag-contents "MeshHeading" xmlstr pos mhl-end)
		       (if mh-start
			   (progn
			     (push (extract-mesh-heading xmlstr mh-start mh-end) mesh-headings)
			     (setq pos mh-next)
			     )
			 (setq done t))))
	      (nreverse mesh-headings)))))

    article))

(defun extract-author (xmlstr start end)
  "Extract author name from XML string"
  (let ((last-name (xml-tag-contents "LastName" xmlstr start end))
	(initials  (xml-tag-contents "Initials" xmlstr start end)))
    (concatenate 'string last-name " " initials)))

(defun extract-mesh-heading (xmlstr start end)
  "Extract and format mesh headings from XML string"
  (let ((desc (xml-tag-contents "DescriptorName" xmlstr start end))
	(sh  (xml-tag-contents "SubHeading" xmlstr start end)))
    (if sh
	(format nil "~a(~a)" desc sh)
      desc)))

(defun extract-pmid-list (results)
  "Returns list of PubMed ID's from XML result string"
  (cond
   ((search "<ERROR>" results)
    (error 'pubmed-query-error :response results))
   ((search "<H1>Server Error</H1>" results)
    (error 'pubmed-server-error :response results))
   (t
    (awhen (xml-tag-contents "Id" results)
	   (delimited-string-to-list it #\space)))))



