From c0d7fdc5fe93c73c93d4c57db4966a8217a215e2 Mon Sep 17 00:00:00 2001
From: Diego Doe <diegovs87@yahoo.fr>
Date: Thu, 10 Jan 2019 22:03:25 +0100
Subject: [PATCH 1/5] Fixed problem apparently due to using http protocol
 instead of https.

---
 R/pmcOAI.R | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)
diff --git a/R/pmcOAI.R b/R/pmcOAI.R
index b697fd4..4cf1af5 100644
--- a/R/pmcOAI.R
+++ b/R/pmcOAI.R
@@ -1,6 +1,6 @@
 # Get XML from PMC-OAI service  (Pubmed Central Open Archives Initiative)
 
-# http://www.ncbi.nlm.nih.gov/pmc/tools/oai/
+# https://www.ncbi.nlm.nih.gov/pmc/tools/oai/
 
 pmcOAI <- function(id,  ...){
   
@@ -11,11 +11,11 @@ pmcOAI <- function(id,  ...){
    id2 <- gsub("PMC", "", id)
 
    # file name for attributes
-   file  <- paste("http://www.ncbi.nlm.nih.gov/pmc/articles/", id, sep="")
+   file  <- paste("https://www.ncbi.nlm.nih.gov/pmc/articles/", id, sep="")
  
    # use getURL in RCurl package (readlines returns incomplete line warning and does not get errors (just 404 NOT found)
-  #  url <- "http://www.pubmedcentral.nih.gov/oai/oai.cgi?verb=GetRecord&metadataPrefix=pmc&identifier=oai:pubmedcentral.nih.gov:"   
-     url <- "http://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi?verb=GetRecord&metadataPrefix=pmc&identifier=oai:pubmedcentral.nih.gov:"
+  #  url <- "https://www.pubmedcentral.nih.gov/oai/oai.cgi?verb=GetRecord&metadataPrefix=pmc&identifier=oai:pubmedcentral.nih.gov:"   
+     url <- "https://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi?verb=GetRecord&metadataPrefix=pmc&identifier=oai:pubmedcentral.nih.gov:"
 
    x <- getURL( paste0(url, id2), .encoding="UTF-8", ...)
    
@@ -25,8 +25,8 @@ pmcOAI <- function(id,  ...){
       if(error=="idDoesNotExist") stop("No results found using ", id)
 
       message("No full text in Open Access Subset, downloading metadata only" )        
-    #  url <- "http://www.pubmedcentral.nih.gov/oai/oai.cgi?verb=GetRecord&metadataPrefix=pmc_fm&identifier=oai:pubmedcentral.nih.gov:"
-       url <- "http://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi?verb=GetRecord&metadataPrefix=pmc_fm&identifier=oai:pubmedcentral.nih.gov:"
+    #  url <- "https://www.pubmedcentral.nih.gov/oai/oai.cgi?verb=GetRecord&metadataPrefix=pmc_fm&identifier=oai:pubmedcentral.nih.gov:"
+       url <- "https://www.ncbi.nlm.nih.gov/pmc/oai/oai.cgi?verb=GetRecord&metadataPrefix=pmc_fm&identifier=oai:pubmedcentral.nih.gov:"
 
       x <- getURL( paste0(url, id2), .encoding="UTF-8", ...)
   

From 9ef297b25ce88b3a55cde6bdd34ae107dea7b2b7 Mon Sep 17 00:00:00 2001
From: MFreidank <freidankm@yahoo.de>
Date: Fri, 25 Jan 2019 23:16:12 +0100
Subject: [PATCH 2/5] split downloading and parsing xml apart

This allows pre-downloading of xml files and
then simply calling `pmcOAI::processXML`
with the contents of XML files.
---
 R/pmcOAI.R | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/R/pmcOAI.R b/R/pmcOAI.R
index 4cf1af5..ef098e5 100644
--- a/R/pmcOAI.R
+++ b/R/pmcOAI.R
@@ -31,6 +31,12 @@ pmcOAI <- function(id,  ...){
       x <- getURL( paste0(url, id2), .encoding="UTF-8", ...)
   
    }
+
+   doc <- processXML(x)
+   doc
+}
+
+processXML <- function(xmlFile, id=NULL, file=NULL) {
    # Remove namespace for easier XPath queries
 #   x[1] <- gsub(" xmlns=[^ ]*" , "", x[1])
 # see PMC4515827 with tab before xmlns,  \txmlns=
@@ -43,10 +49,15 @@ pmcOAI <- function(id,  ...){
    x[n] <- gsub(">([^<])</xref>", ">^\\1</xref>", x[n])
 
    doc <- xmlParse(x)  
- 
+
    ## ADD attributes  
-   attr(doc, "id") <- id
-   attr(doc, "file") <- file
+   if (id) {
+    attr(doc, "id") <- id
+   }
+
+   if (file) {
+    attr(doc, "file") <- file
+   }
+
    doc
 }
-

From 140d337840a9c6080520eb2459cbbd63081c3984 Mon Sep 17 00:00:00 2001
From: MFreidank <freidankm@yahoo.de>
Date: Sat, 26 Jan 2019 12:20:48 +0100
Subject: [PATCH 3/5] minor fix

---
 R/pmcOAI.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/pmcOAI.R b/R/pmcOAI.R
index ef098e5..d788fde 100644
--- a/R/pmcOAI.R
+++ b/R/pmcOAI.R
@@ -36,7 +36,7 @@ pmcOAI <- function(id,  ...){
    doc
 }
 
-processXML <- function(xmlFile, id=NULL, file=NULL) {
+processXML <- function(x, id=NULL, file=NULL) {
    # Remove namespace for easier XPath queries
 #   x[1] <- gsub(" xmlns=[^ ]*" , "", x[1])
 # see PMC4515827 with tab before xmlns,  \txmlns=

From 3c80ead4a433fa81e35d292b2b7ae0e8a76d88c2 Mon Sep 17 00:00:00 2001
From: MFreidank <freidankm@yahoo.de>
Date: Sat, 26 Jan 2019 12:22:03 +0100
Subject: [PATCH 4/5] turn check to !is.null

---
 R/pmcOAI.R | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/R/pmcOAI.R b/R/pmcOAI.R
index d788fde..f745ceb 100644
--- a/R/pmcOAI.R
+++ b/R/pmcOAI.R
@@ -51,11 +51,11 @@ processXML <- function(x, id=NULL, file=NULL) {
    doc <- xmlParse(x)  
 
    ## ADD attributes  
-   if (id) {
+   if (!is.null(id)) {
     attr(doc, "id") <- id
    }
 
-   if (file) {
+   if (!is.null(file)) {
     attr(doc, "file") <- file
    }
 

From ced317588dca0cae0ef0fad3796f36248f0c0d09 Mon Sep 17 00:00:00 2001
From: Diego Doe <diegovs87@yahoo.fr>
Date: Sat, 27 Jul 2019 16:54:49 +0200
Subject: [PATCH 5/5] added spaces

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 1afa227..8e0e531 100644
--- a/README.md
+++ b/README.md
@@ -16,3 +16,5 @@ to read zip, word tables and pdf supplementary files.
 Additional details about the package are on the [wiki pages](https://github.com/cstubben/pmcXML/wiki/Overview) and in [BMC Bioinformatics](http://www.biomedcentral.com/1471-2105/15/43/abstract).
 
 Stubben, CJ and JC Challacombe, 2014. Mining locus tags in PubMed Central to improve microbial gene annotation. BMC Bioinformatics 15:43.
+
+