endecoder implementation

main
Fabien Benureau 11 years ago
parent 3a7458f226
commit e26c606163

@ -0,0 +1,67 @@
import color
import yaml
try:
import cStringIO as StringIO
except ImportError:
import StringIO
try:
import pybtex.database.input.bibtex
import pybtex.database.input.bibtexml
import pybtex.database.input.bibyaml
import pybtex.database.output.bibyaml
except ImportError:
print(color.dye('error', color.error) +
": you need to install Pybtex; try running 'pip install "
"pybtex' or 'easy_install pybtex'")
exit(-1)
class EnDecoder(object):
""" Encode and decode content.
Design choices:
* Has no interaction with disk.
* Incoming content is not trusted.
* Returned content must be correctly formatted (no one else checks).
* Failures raise ValueError
* encode_bibdata will try to recognize exceptions
"""
decode_fmt = (pybtex.database.input.bibyaml,
pybtex.database.input.bibtex,
pybtex.database.input.bibtexml)
def encode_metadata(self, metadata):
return yaml.safe_dump(metadata, allow_unicode=True, encoding='UTF-8', indent = 4)
def decode_metadata(self, metadata_raw):
return yaml.safe_load(metadata_raw)
def encode_bibdata(self, bibdata):
"""Encode bibdata """
s = StringIO.StringIO()
pybtex.database.output.bibyaml.Writer().write_stream(bibdata, s)
return s.getvalue()
def decode_bibdata(self, bibdata_raw):
""""""
bibdata_rawutf8 = unicode(bibdata_raw)
for fmt in EnDecoder.decode_fmt:
try:
bibdata_stream = StringIO.StringIO(bibdata_rawutf8)
return self._decode_bibdata(bibdata_stream, fmt.Parser())
except ValueError:
pass
raise ValueError('could not parse bibdata')
def _decode_bibdata(self, bibdata_stream, parser):
try:
entry = parser.parse_stream(bibdata_stream)
if len(entry.entries) > 0:
return entry
except Exception:
pass
raise ValueError('could not parse bibdata')

@ -0,0 +1,141 @@
# -*- coding: utf-8 -*-
import unittest
import yaml
import testenv
from papers import endecoder
bibyaml_raw0 = """entries:
Page99:
abstract: The importance of a Web page is an inherently subjective matter,
which depends on the readers interests, knowledge and attitudes. But there
is still much that can be said objectively about the relative importance
of Web pages. This paper describes PageRank, a mathod for rating Web pages
objectively and mechanically, effectively measuring the human interest
and attention devoted to them. We compare PageRank to an idealized random
Web surfer. We show how to efficiently compute PageRank for large numbers
of pages. And, we show how to apply PageRank to search and to user navigation.
author:
- first: Lawrence
last: Page
- first: Sergey
last: Brin
- first: Rajeev
last: Motwani
- first: Terry
last: Winograd
institution: Stanford InfoLab
month: November
note: Previous number = SIDL-WP-1999-0120
number: 1999-66
publisher: Stanford InfoLab
title: 'The PageRank Citation Ranking: Bringing Order to the Web.'
type: techreport
url: http://ilpubs.stanford.edu:8090/422/
year: '1999'
"""
bibtexml_raw0 = """<?xml version='1.0' encoding='UTF-8'?>
<bibtex:file xmlns:bibtex="http://bibtexml.sf.net/">
<bibtex:entry id="Page99">
<bibtex:techreport>
<bibtex:publisher>Stanford InfoLab</bibtex:publisher>
<bibtex:title>The PageRank Citation Ranking: Bringing Order to the Web.</bibtex:title>
<bibtex:url>http://ilpubs.stanford.edu:8090/422/</bibtex:url>
<bibtex:abstract>The importance of a Web page is an inherently subjective matter, which depends on the readers interests, knowledge and attitudes. But there is still much that can be said objectively about the relative importance of Web pages. This paper describes PageRank, a mathod for rating Web pages objectively and mechanically, effectively measuring the human interest and attention devoted to them. We compare PageRank to an idealized random Web surfer. We show how to efficiently compute PageRank for large numbers of pages. And, we show how to apply PageRank to search and to user navigation.</bibtex:abstract>
<bibtex:number>1999-66</bibtex:number>
<bibtex:month>November</bibtex:month>
<bibtex:note>Previous number = SIDL-WP-1999-0120</bibtex:note>
<bibtex:year>1999</bibtex:year>
<bibtex:institution>Stanford InfoLab</bibtex:institution>
<bibtex:author>
<bibtex:person>
<bibtex:first>Lawrence</bibtex:first>
<bibtex:last>Page</bibtex:last>
</bibtex:person>
<bibtex:person>
<bibtex:first>Sergey</bibtex:first>
<bibtex:last>Brin</bibtex:last>
</bibtex:person>
<bibtex:person>
<bibtex:first>Rajeev</bibtex:first>
<bibtex:last>Motwani</bibtex:last>
</bibtex:person>
<bibtex:person>
<bibtex:first>Terry</bibtex:first>
<bibtex:last>Winograd</bibtex:last>
</bibtex:person>
</bibtex:author>
</bibtex:techreport>
</bibtex:entry>
</bibtex:file>
"""
bibtex_raw0 = """
@techreport{
Page99,
author = "Page, Lawrence and Brin, Sergey and Motwani, Rajeev and Winograd, Terry",
publisher = "Stanford InfoLab",
title = "The PageRank Citation Ranking: Bringing Order to the Web.",
url = "http://ilpubs.stanford.edu:8090/422/",
abstract = "The importance of a Web page is an inherently subjective matter, which depends on the readers interests, knowledge and attitudes. But there is still much that can be said objectively about the relative importance of Web pages. This paper describes PageRank, a mathod for rating Web pages objectively and mechanically, effectively measuring the human interest and attention devoted to them. We compare PageRank to an idealized random Web surfer. We show how to efficiently compute PageRank for large numbers of pages. And, we show how to apply PageRank to search and to user navigation.",
number = "1999-66",
month = "November",
note = "Previous number = SIDL-WP-1999-0120",
year = "1999",
institution = "Stanford InfoLab"
}
"""
metadata_raw0 = """external-document: null
notes: []
tags: [search, network]
"""
def compare_yaml_str(s1, s2):
if s1 == s2:
return True
else:
y1 = yaml.safe_load(s1)
y2 = yaml.safe_load(s2)
return y1 == y2
class TestEnDecode(unittest.TestCase):
def test_endecode_bibyaml(self):
decoder = endecoder.EnDecoder()
entry = decoder.decode_bibdata(bibyaml_raw0)
bibyaml_output0 = decoder.encode_bibdata(entry)
self.assertEqual(bibyaml_raw0, bibyaml_output0)
self.assertTrue(compare_yaml_str(bibyaml_raw0, bibyaml_output0))
def test_endecode_bibtexml(self):
decoder = endecoder.EnDecoder()
entry = decoder.decode_bibdata(bibtexml_raw0)
bibyaml_output0 = decoder.encode_bibdata(entry)
self.assertTrue(compare_yaml_str(bibyaml_raw0, bibyaml_output0))
def test_endecode_bibtex(self):
decoder = endecoder.EnDecoder()
entry = decoder.decode_bibdata(bibtex_raw0)
bibyaml_output0 = decoder.encode_bibdata(entry)
self.assertTrue(compare_yaml_str(bibyaml_raw0, bibyaml_output0))
def test_endecode_metadata(self):
decoder = endecoder.EnDecoder()
entry = decoder.decode_metadata(metadata_raw0)
metadata_output0 = decoder.encode_metadata(entry)
self.assertEqual(metadata_raw0, metadata_output0)
Loading…
Cancel
Save