parent
3a7458f226
commit
e26c606163
@ -0,0 +1,67 @@
|
||||
import color
|
||||
import yaml
|
||||
|
||||
try:
|
||||
import cStringIO as StringIO
|
||||
except ImportError:
|
||||
import StringIO
|
||||
|
||||
try:
|
||||
import pybtex.database.input.bibtex
|
||||
import pybtex.database.input.bibtexml
|
||||
import pybtex.database.input.bibyaml
|
||||
import pybtex.database.output.bibyaml
|
||||
|
||||
except ImportError:
|
||||
print(color.dye('error', color.error) +
|
||||
": you need to install Pybtex; try running 'pip install "
|
||||
"pybtex' or 'easy_install pybtex'")
|
||||
exit(-1)
|
||||
|
||||
|
||||
class EnDecoder(object):
|
||||
""" Encode and decode content.
|
||||
|
||||
Design choices:
|
||||
* Has no interaction with disk.
|
||||
* Incoming content is not trusted.
|
||||
* Returned content must be correctly formatted (no one else checks).
|
||||
* Failures raise ValueError
|
||||
* encode_bibdata will try to recognize exceptions
|
||||
"""
|
||||
|
||||
decode_fmt = (pybtex.database.input.bibyaml,
|
||||
pybtex.database.input.bibtex,
|
||||
pybtex.database.input.bibtexml)
|
||||
|
||||
def encode_metadata(self, metadata):
|
||||
return yaml.safe_dump(metadata, allow_unicode=True, encoding='UTF-8', indent = 4)
|
||||
|
||||
def decode_metadata(self, metadata_raw):
|
||||
return yaml.safe_load(metadata_raw)
|
||||
|
||||
def encode_bibdata(self, bibdata):
|
||||
"""Encode bibdata """
|
||||
s = StringIO.StringIO()
|
||||
pybtex.database.output.bibyaml.Writer().write_stream(bibdata, s)
|
||||
return s.getvalue()
|
||||
|
||||
def decode_bibdata(self, bibdata_raw):
|
||||
""""""
|
||||
bibdata_rawutf8 = unicode(bibdata_raw)
|
||||
for fmt in EnDecoder.decode_fmt:
|
||||
try:
|
||||
bibdata_stream = StringIO.StringIO(bibdata_rawutf8)
|
||||
return self._decode_bibdata(bibdata_stream, fmt.Parser())
|
||||
except ValueError:
|
||||
pass
|
||||
raise ValueError('could not parse bibdata')
|
||||
|
||||
def _decode_bibdata(self, bibdata_stream, parser):
|
||||
try:
|
||||
entry = parser.parse_stream(bibdata_stream)
|
||||
if len(entry.entries) > 0:
|
||||
return entry
|
||||
except Exception:
|
||||
pass
|
||||
raise ValueError('could not parse bibdata')
|
@ -0,0 +1,141 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
import unittest
|
||||
|
||||
import yaml
|
||||
|
||||
import testenv
|
||||
from papers import endecoder
|
||||
|
||||
bibyaml_raw0 = """entries:
|
||||
Page99:
|
||||
abstract: The importance of a Web page is an inherently subjective matter,
|
||||
which depends on the readers interests, knowledge and attitudes. But there
|
||||
is still much that can be said objectively about the relative importance
|
||||
of Web pages. This paper describes PageRank, a mathod for rating Web pages
|
||||
objectively and mechanically, effectively measuring the human interest
|
||||
and attention devoted to them. We compare PageRank to an idealized random
|
||||
Web surfer. We show how to efficiently compute PageRank for large numbers
|
||||
of pages. And, we show how to apply PageRank to search and to user navigation.
|
||||
author:
|
||||
- first: Lawrence
|
||||
last: Page
|
||||
- first: Sergey
|
||||
last: Brin
|
||||
- first: Rajeev
|
||||
last: Motwani
|
||||
- first: Terry
|
||||
last: Winograd
|
||||
institution: Stanford InfoLab
|
||||
month: November
|
||||
note: Previous number = SIDL-WP-1999-0120
|
||||
number: 1999-66
|
||||
publisher: Stanford InfoLab
|
||||
title: 'The PageRank Citation Ranking: Bringing Order to the Web.'
|
||||
type: techreport
|
||||
url: http://ilpubs.stanford.edu:8090/422/
|
||||
year: '1999'
|
||||
"""
|
||||
|
||||
bibtexml_raw0 = """<?xml version='1.0' encoding='UTF-8'?>
|
||||
<bibtex:file xmlns:bibtex="http://bibtexml.sf.net/">
|
||||
|
||||
<bibtex:entry id="Page99">
|
||||
<bibtex:techreport>
|
||||
<bibtex:publisher>Stanford InfoLab</bibtex:publisher>
|
||||
<bibtex:title>The PageRank Citation Ranking: Bringing Order to the Web.</bibtex:title>
|
||||
<bibtex:url>http://ilpubs.stanford.edu:8090/422/</bibtex:url>
|
||||
<bibtex:abstract>The importance of a Web page is an inherently subjective matter, which depends on the readers interests, knowledge and attitudes. But there is still much that can be said objectively about the relative importance of Web pages. This paper describes PageRank, a mathod for rating Web pages objectively and mechanically, effectively measuring the human interest and attention devoted to them. We compare PageRank to an idealized random Web surfer. We show how to efficiently compute PageRank for large numbers of pages. And, we show how to apply PageRank to search and to user navigation.</bibtex:abstract>
|
||||
<bibtex:number>1999-66</bibtex:number>
|
||||
<bibtex:month>November</bibtex:month>
|
||||
<bibtex:note>Previous number = SIDL-WP-1999-0120</bibtex:note>
|
||||
<bibtex:year>1999</bibtex:year>
|
||||
<bibtex:institution>Stanford InfoLab</bibtex:institution>
|
||||
<bibtex:author>
|
||||
<bibtex:person>
|
||||
<bibtex:first>Lawrence</bibtex:first>
|
||||
<bibtex:last>Page</bibtex:last>
|
||||
</bibtex:person>
|
||||
<bibtex:person>
|
||||
<bibtex:first>Sergey</bibtex:first>
|
||||
<bibtex:last>Brin</bibtex:last>
|
||||
</bibtex:person>
|
||||
<bibtex:person>
|
||||
<bibtex:first>Rajeev</bibtex:first>
|
||||
<bibtex:last>Motwani</bibtex:last>
|
||||
</bibtex:person>
|
||||
<bibtex:person>
|
||||
<bibtex:first>Terry</bibtex:first>
|
||||
<bibtex:last>Winograd</bibtex:last>
|
||||
</bibtex:person>
|
||||
</bibtex:author>
|
||||
</bibtex:techreport>
|
||||
</bibtex:entry>
|
||||
|
||||
</bibtex:file>
|
||||
"""
|
||||
|
||||
bibtex_raw0 = """
|
||||
@techreport{
|
||||
Page99,
|
||||
author = "Page, Lawrence and Brin, Sergey and Motwani, Rajeev and Winograd, Terry",
|
||||
publisher = "Stanford InfoLab",
|
||||
title = "The PageRank Citation Ranking: Bringing Order to the Web.",
|
||||
url = "http://ilpubs.stanford.edu:8090/422/",
|
||||
abstract = "The importance of a Web page is an inherently subjective matter, which depends on the readers interests, knowledge and attitudes. But there is still much that can be said objectively about the relative importance of Web pages. This paper describes PageRank, a mathod for rating Web pages objectively and mechanically, effectively measuring the human interest and attention devoted to them. We compare PageRank to an idealized random Web surfer. We show how to efficiently compute PageRank for large numbers of pages. And, we show how to apply PageRank to search and to user navigation.",
|
||||
number = "1999-66",
|
||||
month = "November",
|
||||
note = "Previous number = SIDL-WP-1999-0120",
|
||||
year = "1999",
|
||||
institution = "Stanford InfoLab"
|
||||
}
|
||||
"""
|
||||
|
||||
metadata_raw0 = """external-document: null
|
||||
notes: []
|
||||
tags: [search, network]
|
||||
"""
|
||||
|
||||
def compare_yaml_str(s1, s2):
|
||||
if s1 == s2:
|
||||
return True
|
||||
else:
|
||||
y1 = yaml.safe_load(s1)
|
||||
y2 = yaml.safe_load(s2)
|
||||
return y1 == y2
|
||||
|
||||
|
||||
class TestEnDecode(unittest.TestCase):
|
||||
|
||||
def test_endecode_bibyaml(self):
|
||||
|
||||
decoder = endecoder.EnDecoder()
|
||||
entry = decoder.decode_bibdata(bibyaml_raw0)
|
||||
bibyaml_output0 = decoder.encode_bibdata(entry)
|
||||
|
||||
self.assertEqual(bibyaml_raw0, bibyaml_output0)
|
||||
self.assertTrue(compare_yaml_str(bibyaml_raw0, bibyaml_output0))
|
||||
|
||||
def test_endecode_bibtexml(self):
|
||||
|
||||
decoder = endecoder.EnDecoder()
|
||||
entry = decoder.decode_bibdata(bibtexml_raw0)
|
||||
bibyaml_output0 = decoder.encode_bibdata(entry)
|
||||
|
||||
self.assertTrue(compare_yaml_str(bibyaml_raw0, bibyaml_output0))
|
||||
|
||||
def test_endecode_bibtex(self):
|
||||
|
||||
decoder = endecoder.EnDecoder()
|
||||
entry = decoder.decode_bibdata(bibtex_raw0)
|
||||
bibyaml_output0 = decoder.encode_bibdata(entry)
|
||||
|
||||
self.assertTrue(compare_yaml_str(bibyaml_raw0, bibyaml_output0))
|
||||
|
||||
def test_endecode_metadata(self):
|
||||
|
||||
decoder = endecoder.EnDecoder()
|
||||
entry = decoder.decode_metadata(metadata_raw0)
|
||||
metadata_output0 = decoder.encode_metadata(entry)
|
||||
|
||||
self.assertEqual(metadata_raw0, metadata_output0)
|
||||
|
Loading…
Reference in new issue