Skip to content

Commit 1b3a721

Browse files
committed
GdsCollector: proof-of-concept to catch validation errors including filename
1 parent 3d16bff commit 1b3a721

File tree

4 files changed

+21
-6
lines changed

4 files changed

+21
-6
lines changed

ocrd_models/ocrd_models/generatedscollector.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
class OcrdGdsCollector(object):
22

3-
def __init__(self, messages=None):
4-
print('GdsCollector.__init__', self)
3+
def __init__(self, filename=None, messages=None):
4+
self.filename = filename
55
if messages is None:
66
self.messages = []
77
else:

ocrd_models/ocrd_page_parse.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
def parse(inFileName, silence=False, print_warnings=True):
22
global CapturedNsmap_
3-
gds_collector = GdsCollector_()
3+
if not filename:
4+
filename=inFilename
5+
gds_collector = GdsCollector_(filename=filenmae)
46
parser = None
57
doc = parsexml_(inFileName, parser)
68
rootNode = doc.getroot()
@@ -29,7 +31,7 @@ def parse(inFileName, silence=False, print_warnings=True):
2931
sys.stderr.write(separator)
3032
return rootObj
3133

32-
def parseString(inString, silence=False, print_warnings=True):
34+
def parseString(inString, filename=None, silence=False, print_warnings=True):
3335
'''Parse a string, create the object tree, and export it.
3436
3537
Arguments:
@@ -40,7 +42,7 @@ def parseString(inString, silence=False, print_warnings=True):
4042
'''
4143
parser = None
4244
rootNode= parsexmlstring_(inString, parser)
43-
gds_collector = GdsCollector_()
45+
gds_collector = GdsCollector_(filename=filename)
4446
rootTag, rootClass = get_root_tag(rootNode)
4547
if rootClass is None:
4648
rootTag = 'PcGts'

requirements_test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
pytest >= 4.0.0
2-
generateDS == 2.35.24
2+
generateDS == 2.35.26
33
coverage >= 4.5.2
44
sphinx
55
codecov >= 2.0.15

tests/model/test_ocrd_page.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -321,5 +321,18 @@ def test_gdscollector_override(self):
321321
gdc = pcgts.gds_collector_
322322
self.assertTrue(isinstance(gdc, OcrdGdsCollector))
323323

324+
def test_gdscollector_info(self):
325+
filename = assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml')
326+
with open(filename, 'r') as f:
327+
s = f.read()
328+
s = s.replace('pc:Page', 'pc:Foo')
329+
s = s.encode('utf-8')
330+
pcgts = parseString(s, silence=True, filename=filename)
331+
gdsc = pcgts.gds_collector_
332+
self.assertEqual(gdsc.messages, [])
333+
self.assertEqual(gdsc.filename, filename)
334+
pcgts.validate_(gdsc, True)
335+
self.assertEqual(gdsc.messages, ['Number of values for Page near line 2 is below the minimum allowed, expected at least 1, found 0'])
336+
324337
if __name__ == '__main__':
325338
main(__file__)

0 commit comments

Comments
 (0)