jimklo · ljwolford · Apr 24, 2012
diff --git a/datapump/oaipmh.py b/datapump/oaipmh.py
@@ -240,7 +240,23 @@ def fetchCollections(self):
 
         return col_names
 
-
+        #SHODOR FIX FUNCTION
+    def stripTokenList(self, tokenList):
+
+        def find_nth(haystack, needle, n):
+            start = haystack.find(needle)
+            while start >= 0 and n > 1:
+                start = haystack.find(needle, start + len(needle))
+                n -= 1
+            return start
+
+        newTokenList = []
+        for token in tokenList:
+            idx = find_nth(token, "!", 3)
+            token = "!!" + token[idx:]    
+            newTokenList.append(token)
+        return newTokenList
+
     def fetchRecords(self):
         '''
         Generator to fetch all records using a resumptionToken if supplied.
@@ -265,6 +281,14 @@ def fetchRecords(self):
         f = StringIO(body)
         tree = etree.parse(f)
         tokenList = tree.xpath("oai:ListRecords/oai:resumptionToken/text()", namespaces=self.namespaces)
+
+        #SHODOR LOG
+        for token in tokenList:
+            log.info('TOKEN: %s' % token)
+
+        #SHODOR FIX
+        tokenList = self.stripTokenList(tokenList)
+
         yield tree.xpath("oai:ListRecords/oai:record", namespaces=self.namespaces)
 
         while (len(tokenList) == 1):
@@ -275,6 +299,13 @@ def fetchRecords(self):
                 tree = etree.parse(f)
                 yield tree.xpath("oai:ListRecords/oai:record", namespaces=self.namespaces)
                 tokenList = tree.xpath("oai:ListRecords/oai:resumptionToken/text()", namespaces=self.namespaces)
+
+                #SHODOR LOG
+                for token in tokenList:
+                    log.info('TOKEN: %s' % token)
+
+                #SHODOR FIX
+                tokenList = self.stripTokenList(tokenList)
             except:
                 tokenList = []
                 log.exception("Problem trying to get next segment.")
@@ -289,8 +320,22 @@ def makeRequest(self, base_url, credentials=None, **kw):
         }
         if credentials is not None:
             headers['Authorization'] = 'Basic ' + credentials.strip()
+
+        #SHODOR FIX
+        def replace_all(text, dic):
+            for i,j in dic.iteritems():
+                text = text.replace(i,j)
+            return text
+
+        replaceDict ={
+                      "%3A" : ":",
+                      "%21" : "!"
+        }
+
+        encoded = replace_all(urlencode(kw), replaceDict)
+
         request = urllib2.Request(
-            "{url}?{query}".format(url=base_url, query=urlencode(kw)), headers=headers)
+            "{url}?{query}".format(url=base_url, query=encoded), headers=headers)
         log.debug("URL Requested: %s", request.get_full_url())
         return self.retrieveFromUrlWaiting(request)