File tree Expand file tree Collapse file tree 11 files changed +132
-239
lines changed Expand file tree Collapse file tree 11 files changed +132
-239
lines changed Original file line number Diff line number Diff line change @@ -3,6 +3,8 @@ ignored-modules=cv2
33
44[MESSAGES CONTROL]
55disable =
6+ super-with-arguments,
7+ trailing-whitespace,
68 missing-docstring,
79 no-self-use,
810 superfluous-parens,
Original file line number Diff line number Diff line change @@ -4,6 +4,17 @@ Versioned according to [Semantic Versioning](http://semver.org/).
44
55## Unreleased
66
7+ ## [ 0.1.1] - 2020-09-14
8+
9+ Changed:
10+
11+ * repair: traverse all text regions recursively
12+
13+ Fixed:
14+
15+ * repair: be robust against invalid input polygons
16+ * repair: be careful to make valid output polygons
17+
718## [ 0.1.0] - 2020-08-21
819
920Changed:
Original file line number Diff line number Diff line change 1111 polygon_from_points ,
1212 MIME_TO_EXT
1313)
14- from ocrd_models .ocrd_page import (
15- LabelsType , LabelType ,
16- MetadataItemType
17- )
1814from ocrd_modelfactory import page_from_file
1915from ocrd import Processor
2016
@@ -75,18 +71,8 @@ def process(self):
7571 page_id = input_file .pageId or input_file .ID
7672 LOG .info ("INPUT FILE %i / %s" , n , page_id )
7773 pcgts = page_from_file (self .workspace .download_file (input_file ))
74+ self .add_metadata (pcgts )
7875 page = pcgts .get_Page ()
79- metadata = pcgts .get_Metadata () # ensured by from_file()
80- metadata .add_MetadataItem (
81- MetadataItemType (type_ = "processingStep" ,
82- name = self .ocrd_tool ['steps' ][0 ],
83- value = TOOL ,
84- Labels = [LabelsType (
85- externalModel = "ocrd-tool" ,
86- externalId = "parameters" ,
87- Label = [LabelType (type_ = name ,
88- value = self .parameter [name ])
89- for name in self .parameter .keys ()])]))
9076 page_image , page_coords , page_image_info = self .workspace .image_from_page (
9177 page , page_id ,
9278 transparency = self .parameter ['transparency' ])
Original file line number Diff line number Diff line change 1616 xywh_from_polygon ,
1717 MIME_TO_EXT
1818)
19- from ocrd_models .ocrd_page import (
20- LabelsType , LabelType ,
21- MetadataItemType
22- )
2319from ocrd_modelfactory import page_from_file
2420from ocrd import Processor
2521
@@ -168,19 +164,9 @@ def process(self):
168164 num_page_id = int (page_id .strip (page_id .strip ("0123456789" )))
169165 LOG .info ("INPUT FILE %i / %s" , n , page_id )
170166 pcgts = page_from_file (self .workspace .download_file (input_file ))
167+ self .add_metadata (pcgts )
171168 page = pcgts .get_Page ()
172169 ptype = page .get_type ()
173- metadata = pcgts .get_Metadata () # ensured by from_file()
174- metadata .add_MetadataItem (
175- MetadataItemType (type_ = "processingStep" ,
176- name = self .ocrd_tool ['steps' ][0 ],
177- value = TOOL ,
178- Labels = [LabelsType (
179- externalModel = "ocrd-tool" ,
180- externalId = "parameters" ,
181- Label = [LabelType (type_ = name ,
182- value = self .parameter [name ])
183- for name in self .parameter ])]))
184170 page_image , page_coords , page_image_info = self .workspace .image_from_page (
185171 page , page_id ,
186172 feature_filter = 'binarized' ,
Original file line number Diff line number Diff line change 1010 polygon_from_points ,
1111 MIME_TO_EXT
1212)
13- from ocrd_models .ocrd_page import (
14- LabelsType , LabelType ,
15- MetadataItemType
16- )
1713from ocrd_modelfactory import page_from_file
1814from ocrd import Processor
1915
@@ -72,18 +68,8 @@ def process(self):
7268 page_id = input_file .pageId or input_file .ID
7369 LOG .info ("INPUT FILE %i / %s" , n , page_id )
7470 pcgts = page_from_file (self .workspace .download_file (input_file ))
71+ self .add_metadata (pcgts )
7572 page = pcgts .get_Page ()
76- metadata = pcgts .get_Metadata () # ensured by from_file()
77- metadata .add_MetadataItem (
78- MetadataItemType (type_ = "processingStep" ,
79- name = self .ocrd_tool ['steps' ][0 ],
80- value = TOOL ,
81- Labels = [LabelsType (
82- externalModel = "ocrd-tool" ,
83- externalId = "parameters" ,
84- Label = [LabelType (type_ = name ,
85- value = self .parameter [name ])
86- for name in self .parameter ])]))
8773 page_image , page_coords , page_image_info = self .workspace .image_from_page (
8874 page , page_id ,
8975 transparency = self .parameter ['transparency' ])
Original file line number Diff line number Diff line change 1717# pragma pylint: disable=unused-import
1818# (region types will be referenced indirectly via globals())
1919from ocrd_models .ocrd_page import (
20- MetadataItemType ,
21- LabelsType , LabelType ,
2220 CoordsType ,
2321 TextRegionType ,
2422 ImageRegionType ,
@@ -138,21 +136,9 @@ def process(self):
138136 num_page_id = int (page_id .strip (page_id .strip ("0123456789" )))
139137 LOG .info ("INPUT FILE %i / %s" , n , page_id )
140138 pcgts = page_from_file (self .workspace .download_file (input_file ))
139+ self .add_metadata (pcgts )
141140 page = pcgts .get_Page ()
142141
143- # add metadata about this operation and its runtime parameters:
144- metadata = pcgts .get_Metadata () # ensured by from_file()
145- metadata .add_MetadataItem (
146- MetadataItemType (type_ = "processingStep" ,
147- name = self .ocrd_tool ['steps' ][0 ],
148- value = TOOL ,
149- Labels = [LabelsType (
150- externalModel = "ocrd-tool" ,
151- externalId = "parameters" ,
152- Label = [LabelType (type_ = name ,
153- value = self .parameter [name ])
154- for name in self .parameter .keys ()])]))
155-
156142 # find COCO image
157143 if page .imageFilename in images_by_filename :
158144 image = images_by_filename [page .imageFilename ]
Original file line number Diff line number Diff line change 1818# pragma pylint: disable=unused-import
1919# (region types will be referenced indirectly via globals())
2020from ocrd_models .ocrd_page import (
21- MetadataItemType ,
22- LabelsType , LabelType ,
2321 CoordsType ,
2422 TextRegionType ,
2523 ImageRegionType ,
@@ -89,21 +87,9 @@ def process(self):
8987 input_file , segmentation_file = ift
9088 LOG .info ("processing page %s" , input_file .pageId )
9189 pcgts = page_from_file (self .workspace .download_file (input_file ))
90+ self .add_metadata (pcgts )
9291 page = pcgts .get_Page ()
9392
94- # add metadata about this operation and its runtime parameters:
95- metadata = pcgts .get_Metadata () # ensured by from_file()
96- metadata .add_MetadataItem (
97- MetadataItemType (type_ = "processingStep" ,
98- name = self .ocrd_tool ['steps' ][0 ],
99- value = TOOL ,
100- Labels = [LabelsType (
101- externalModel = "ocrd-tool" ,
102- externalId = "parameters" ,
103- Label = [LabelType (type_ = name ,
104- value = self .parameter [name ])
105- for name in self .parameter .keys ()])]))
106-
10793 # import mask image
10894 segmentation_filename = self .workspace .download_file (segmentation_file ).local_filename
10995 with pushd_popd (self .workspace .directory ):
Original file line number Diff line number Diff line change 11{
2- "version" : " 0.1.0 " ,
2+ "version" : " 0.1.1 " ,
33 "git_url" : " https://github.com/OCR-D/ocrd_segment" ,
44 "tools" : {
55 "ocrd-segment-repair" : {
You can’t perform that action at this time.
0 commit comments