Skip to content

Commit aef6517

Browse files
authored
Merge pull request #43 from bertsky/repair-fix-coords
repair: valid polygons
2 parents 7596e9c + 24d26cd commit aef6517

File tree

11 files changed

+132
-239
lines changed

11 files changed

+132
-239
lines changed

.pylintrc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ ignored-modules=cv2
33

44
[MESSAGES CONTROL]
55
disable =
6+
super-with-arguments,
7+
trailing-whitespace,
68
missing-docstring,
79
no-self-use,
810
superfluous-parens,

CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,17 @@ Versioned according to [Semantic Versioning](http://semver.org/).
44

55
## Unreleased
66

7+
## [0.1.1] - 2020-09-14
8+
9+
Changed:
10+
11+
* repair: traverse all text regions recursively
12+
13+
Fixed:
14+
15+
* repair: be robust against invalid input polygons
16+
* repair: be careful to make valid output polygons
17+
718
## [0.1.0] - 2020-08-21
819

920
Changed:

ocrd_segment/extract_lines.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,6 @@
1111
polygon_from_points,
1212
MIME_TO_EXT
1313
)
14-
from ocrd_models.ocrd_page import (
15-
LabelsType, LabelType,
16-
MetadataItemType
17-
)
1814
from ocrd_modelfactory import page_from_file
1915
from ocrd import Processor
2016

@@ -75,18 +71,8 @@ def process(self):
7571
page_id = input_file.pageId or input_file.ID
7672
LOG.info("INPUT FILE %i / %s", n, page_id)
7773
pcgts = page_from_file(self.workspace.download_file(input_file))
74+
self.add_metadata(pcgts)
7875
page = pcgts.get_Page()
79-
metadata = pcgts.get_Metadata() # ensured by from_file()
80-
metadata.add_MetadataItem(
81-
MetadataItemType(type_="processingStep",
82-
name=self.ocrd_tool['steps'][0],
83-
value=TOOL,
84-
Labels=[LabelsType(
85-
externalModel="ocrd-tool",
86-
externalId="parameters",
87-
Label=[LabelType(type_=name,
88-
value=self.parameter[name])
89-
for name in self.parameter.keys()])]))
9076
page_image, page_coords, page_image_info = self.workspace.image_from_page(
9177
page, page_id,
9278
transparency=self.parameter['transparency'])

ocrd_segment/extract_pages.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,6 @@
1616
xywh_from_polygon,
1717
MIME_TO_EXT
1818
)
19-
from ocrd_models.ocrd_page import (
20-
LabelsType, LabelType,
21-
MetadataItemType
22-
)
2319
from ocrd_modelfactory import page_from_file
2420
from ocrd import Processor
2521

@@ -168,19 +164,9 @@ def process(self):
168164
num_page_id = int(page_id.strip(page_id.strip("0123456789")))
169165
LOG.info("INPUT FILE %i / %s", n, page_id)
170166
pcgts = page_from_file(self.workspace.download_file(input_file))
167+
self.add_metadata(pcgts)
171168
page = pcgts.get_Page()
172169
ptype = page.get_type()
173-
metadata = pcgts.get_Metadata() # ensured by from_file()
174-
metadata.add_MetadataItem(
175-
MetadataItemType(type_="processingStep",
176-
name=self.ocrd_tool['steps'][0],
177-
value=TOOL,
178-
Labels=[LabelsType(
179-
externalModel="ocrd-tool",
180-
externalId="parameters",
181-
Label=[LabelType(type_=name,
182-
value=self.parameter[name])
183-
for name in self.parameter])]))
184170
page_image, page_coords, page_image_info = self.workspace.image_from_page(
185171
page, page_id,
186172
feature_filter='binarized',

ocrd_segment/extract_regions.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,6 @@
1010
polygon_from_points,
1111
MIME_TO_EXT
1212
)
13-
from ocrd_models.ocrd_page import (
14-
LabelsType, LabelType,
15-
MetadataItemType
16-
)
1713
from ocrd_modelfactory import page_from_file
1814
from ocrd import Processor
1915

@@ -72,18 +68,8 @@ def process(self):
7268
page_id = input_file.pageId or input_file.ID
7369
LOG.info("INPUT FILE %i / %s", n, page_id)
7470
pcgts = page_from_file(self.workspace.download_file(input_file))
71+
self.add_metadata(pcgts)
7572
page = pcgts.get_Page()
76-
metadata = pcgts.get_Metadata() # ensured by from_file()
77-
metadata.add_MetadataItem(
78-
MetadataItemType(type_="processingStep",
79-
name=self.ocrd_tool['steps'][0],
80-
value=TOOL,
81-
Labels=[LabelsType(
82-
externalModel="ocrd-tool",
83-
externalId="parameters",
84-
Label=[LabelType(type_=name,
85-
value=self.parameter[name])
86-
for name in self.parameter])]))
8773
page_image, page_coords, page_image_info = self.workspace.image_from_page(
8874
page, page_id,
8975
transparency=self.parameter['transparency'])

ocrd_segment/import_coco_segmentation.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,6 @@
1717
# pragma pylint: disable=unused-import
1818
# (region types will be referenced indirectly via globals())
1919
from ocrd_models.ocrd_page import (
20-
MetadataItemType,
21-
LabelsType, LabelType,
2220
CoordsType,
2321
TextRegionType,
2422
ImageRegionType,
@@ -138,21 +136,9 @@ def process(self):
138136
num_page_id = int(page_id.strip(page_id.strip("0123456789")))
139137
LOG.info("INPUT FILE %i / %s", n, page_id)
140138
pcgts = page_from_file(self.workspace.download_file(input_file))
139+
self.add_metadata(pcgts)
141140
page = pcgts.get_Page()
142141

143-
# add metadata about this operation and its runtime parameters:
144-
metadata = pcgts.get_Metadata() # ensured by from_file()
145-
metadata.add_MetadataItem(
146-
MetadataItemType(type_="processingStep",
147-
name=self.ocrd_tool['steps'][0],
148-
value=TOOL,
149-
Labels=[LabelsType(
150-
externalModel="ocrd-tool",
151-
externalId="parameters",
152-
Label=[LabelType(type_=name,
153-
value=self.parameter[name])
154-
for name in self.parameter.keys()])]))
155-
156142
# find COCO image
157143
if page.imageFilename in images_by_filename:
158144
image = images_by_filename[page.imageFilename]

ocrd_segment/import_image_segmentation.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
# pragma pylint: disable=unused-import
1919
# (region types will be referenced indirectly via globals())
2020
from ocrd_models.ocrd_page import (
21-
MetadataItemType,
22-
LabelsType, LabelType,
2321
CoordsType,
2422
TextRegionType,
2523
ImageRegionType,
@@ -89,21 +87,9 @@ def process(self):
8987
input_file, segmentation_file = ift
9088
LOG.info("processing page %s", input_file.pageId)
9189
pcgts = page_from_file(self.workspace.download_file(input_file))
90+
self.add_metadata(pcgts)
9291
page = pcgts.get_Page()
9392

94-
# add metadata about this operation and its runtime parameters:
95-
metadata = pcgts.get_Metadata() # ensured by from_file()
96-
metadata.add_MetadataItem(
97-
MetadataItemType(type_="processingStep",
98-
name=self.ocrd_tool['steps'][0],
99-
value=TOOL,
100-
Labels=[LabelsType(
101-
externalModel="ocrd-tool",
102-
externalId="parameters",
103-
Label=[LabelType(type_=name,
104-
value=self.parameter[name])
105-
for name in self.parameter.keys()])]))
106-
10793
# import mask image
10894
segmentation_filename = self.workspace.download_file(segmentation_file).local_filename
10995
with pushd_popd(self.workspace.directory):

ocrd_segment/ocrd-tool.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"version": "0.1.0",
2+
"version": "0.1.1",
33
"git_url": "https://github.com/OCR-D/ocrd_segment",
44
"tools": {
55
"ocrd-segment-repair": {

0 commit comments

Comments
 (0)