11from enum import Enum
2- from typing import List
2+ from typing import List , Optional
33
4+ from indico .errors import IndicoInputError
45from indico .types .base import BaseType
56from indico .types .datafile import Datafile
6- from indico .errors import IndicoInputError
77
88
99class DataColumn (BaseType ):
@@ -57,28 +57,35 @@ class Dataset(BaseType):
5757
5858 def labelset_by_name (self , name : str ) -> LabelSet :
5959 if name not in [lab .name for lab in self .labelsets ]:
60- raise IndicoInputError (f"No labelset found for { name } . Current labelset names include { [lab .name for lab in self .labelsets ]} ." )
60+ raise IndicoInputError (
61+ f"No labelset found for { name } . Current labelset names include { [lab .name for lab in self .labelsets ]} ."
62+ )
6163 return next (lab for lab in self .labelsets if lab .name == name )
6264
6365 def datacolumn_by_name (self , name : str ) -> DataColumn :
6466 if name not in [datacol .name for datacol in self .datacolumns ]:
65- raise IndicoInputError (f"No datacolumn found for { name } . Current datacolumn names include { [datacol .name for datacol in self .datacolumns ]} ." )
67+ raise IndicoInputError (
68+ f"No datacolumn found for { name } . Current datacolumn names include { [datacol .name for datacol in self .datacolumns ]} ."
69+ )
6670 return next (datacol for datacol in self .datacolumns if datacol .name == name )
6771
6872
6973class TableReadOrder (Enum ):
7074 ROW = 0
7175 COLUMN = 1
7276
77+
7378class OcrEngine (Enum ):
7479 """
7580 Enum representing available OCR engines.
7681 """
82+
7783 OMNIPAGE = 0
7884 READAPI = 1
7985 READAPI_V2 = 2
8086 READAPI_TABLES_V1 = 3
8187
88+
8289class OmnipageOcrOptionsInput (BaseType ):
8390 """
8491 Omnipage specific OCR options for dataset creation.
@@ -95,6 +102,7 @@ class OmnipageOcrOptionsInput(BaseType):
95102 table_read_order(TableReadOrder): Read table by row or column.
96103
97104 """
105+
98106 auto_rotate : bool
99107 single_column : bool
100108 upscale_images : bool
@@ -105,6 +113,7 @@ class OmnipageOcrOptionsInput(BaseType):
105113 native_pdf : bool
106114 table_read_order : TableReadOrder
107115
116+
108117class ReadApiOcrOptionsInput (BaseType ):
109118 """
110119 Read API OCR options.
@@ -115,20 +124,44 @@ class ReadApiOcrOptionsInput(BaseType):
115124 upscale_images(bool): Scale up low resolution images.
116125 languages(List[str]): List of languages to use.
117126 """
127+
118128 auto_rotate : bool
119129 single_column : bool
120130 upscale_images : bool
121131 languages : List [str ]
122132
133+
123134class OcrInputLanguage (BaseType ):
124135 name : str
125136 code : str
126137
127- class OcrOptionsInput ():
138+
139+ class IncludeSections (BaseType ):
140+ header : Optional [bool ]
141+ body : Optional [bool ]
142+ attachments : Optional [bool ]
143+
144+
145+ class EmailOptions (BaseType ):
146+ """
147+ Email options
148+
149+ Args:
150+ include_sections: Sections of the email to include after parsing (header, body, attachments)
151+ unpack: Unpack an email and treat it as a multi-file Submission
152+ preserve_body_whitespace: Preserve whitespace in the body of the email
153+ """
154+
155+ include_sections : Optional [IncludeSections ]
156+ unpack : Optional [bool ]
157+ preserve_body_whitespace : Optional [bool ]
158+
159+
160+ class OcrOptionsInput :
128161 """
129162 Input options for OCR engine.
130163 """
164+
131165 ocr_engine : OcrEngine
132166 omnipage_options : OmnipageOcrOptionsInput
133167 readapi_options : ReadApiOcrOptionsInput
134-
0 commit comments