Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 190 additions & 2 deletions src/onegov/landsgemeinde/forms/agenda.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,20 @@
from __future__ import annotations
from datetime import datetime

import re
import zipfile
import os
import base64
import gzip
import pytz

from bs4 import BeautifulSoup
from datetime import datetime
from markupsafe import Markup
from io import BytesIO
from pathlib import Path
from tempfile import TemporaryDirectory

from onegov.form import Form
from onegov.form.fields import TagsField
from onegov.form.fields import TimeField
from onegov.form.fields import UploadField
Expand All @@ -27,11 +39,11 @@
from wtforms.validators import Optional
from wtforms.validators import ValidationError


from typing import Any
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from onegov.landsgemeinde.request import LandsgemeindeRequest
from onegov.landsgemeinde.collections import AgendaItemCollection


class AgendaItemForm(NamedFileForm):
Expand Down Expand Up @@ -202,3 +214,179 @@ def populate_obj(self, obj: AgendaItem) -> None: # type:ignore[override]
tz = pytz.timezone('Europe/Zurich')
now = datetime.now(tz=tz).time()
obj.start_time = now


class AgendaItemUploadForm(Form):

request: LandsgemeindeRequest

agenda_item_zip = UploadField(
label=_('Agenda Item ZIP'),
fieldset=_('Import'),
validators=[
WhitelistedMimeType({'application/zip'}),
FileSizeLimit(100 * 1024 * 1024)
]
)

def get_html_dir(self,
temp: TemporaryDirectory[str],
field: UploadField) -> str | None:

temp_path = Path(temp.name)
zip_content = None

if isinstance(field.data, dict) and 'data' in field.data:
encoded_data = field.data['data']
decoded_data = base64.b64decode(encoded_data)

if decoded_data[:2] == b'\x1f\x8b':
decompressed_data = gzip.decompress(decoded_data)
zip_content = BytesIO(decompressed_data)
else:
zip_content = BytesIO(decoded_data)

with zipfile.ZipFile(
zip_content, 'r') as zip_ref: # type:ignore
zip_ref.extractall(temp_path)

html_dir = None
for root, dirs, files in os.walk(temp_path):
if 'html' in dirs:
html_dir = os.path.join(root, 'html')
break

return html_dir

def import_agenda_item(
self, collection: AgendaItemCollection) -> AgendaItem:

temp = TemporaryDirectory()
html_dir = self.get_html_dir(temp, self.agenda_item_zip)
html_path = Path(html_dir) # type:ignore
html_files = sorted(
[f for f in html_path.glob('*.html')
if f.name != 'combined_clean.html'],
key=lambda f: [int(s) if s.isdigit() else -1
for s in re.findall(r'\d+|\D+', f.name)]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the is a natural sorting available in python: https://www.geeksforgeeks.org/python/python-natsorted-function/

)

combined_html = BeautifulSoup(
'<html><head><meta charset="utf-8"></head><body></body></html>',
'html.parser')
title = ''

for file_path in html_files:
with open(file_path, encoding='utf-8') as f:
soup = BeautifulSoup(f.read(), 'html.parser')

all_paragraphs = []

if soup.body is None:
continue
textframes = soup.body.find_all('div', recursive=False)
for textframe in textframes[1:] if textframes else []:
for p_tag in textframe.find_all('p'):
p_class = ' '.join(p_tag.get('class', []))
spans_text = []
parent = p_tag.find_parent()

parent_inline_style = parent.get('style', ''
) if parent else ''
if not parent_inline_style:
p_class = 'table'

regex = re.compile(r'^_idTextSpan\d+')
for span in p_tag.find_all('span', id=regex):
text = span.get_text().strip()
if text:
spans_text.append(text)

if spans_text:
all_paragraphs.append({
'class': p_class,
'text': ' '.join(spans_text),
'is_list_item': 'Aufz-hlung' in p_class
})

i = 0
while i < len(all_paragraphs):
para = all_paragraphs[i]
if combined_html.body is None:
break

# Check if this is the start of a list
if para['is_list_item']:
ul_element = combined_html.new_tag('ul')

# Add this and all consecutive list items to the <ul>
while (i < len(all_paragraphs)
and all_paragraphs[i]['is_list_item']):
li_element = combined_html.new_tag('li')
li_element.string = all_paragraphs[
i]['text'].strip() # type:ignore
ul_element.append(li_element)
i += 1
combined_html.body.append(ul_element)

else:
# Regular paragraph or heading
p_class = str(para['class'])
p_text = para['text']

# Determine heading level based on class
tag_type = 'p'
if '_01-Titel' in p_class:
title = p_text.strip() # type:ignore
i += 1
continue
elif '_02-Titel' in p_class:
tag_type = 'h2'
elif '03-Titel' in p_class in p_class:
tag_type = 'h3'
elif '04-' in p_class in p_class:
tag_type = 'h4'
elif 'Vorlage_Vorlage-Titel' in p_class:
tag_type = 'h2'
elif p_class == 'table':
if i > 0 and all_paragraphs[i-1]['class'] != 'table':
element = combined_html.new_tag('p')
element.string = '--- TABELLE HIER EINFÜGEN ---'
combined_html.body.append(element)
i += 1
continue

element = combined_html.new_tag(tag_type)
element.string = p_text.strip() # type:ignore
combined_html.body.append(element)
i += 1

cleaned_html = Markup(combined_html.prettify()) # nosec: B704

query = self.request.session.query(func.max(AgendaItem.number))
query = query.filter(AgendaItem.assembly_id == self.model.assembly.id)
next_number = (query.scalar() or 0) + 1

agenda_item = collection.add(
text=cleaned_html,
number=next_number,
state='draft',
title=title,
assembly_id=self.model.assembly.id
)

temp.cleanup()
return agenda_item

def validate_agenda_item_zip(
self,
field: UploadField) -> None:
if not field.data:
raise ValidationError(_('No file uploaded.'))

temp = TemporaryDirectory()
html_dir = self.get_html_dir(temp, field)
if not html_dir:
raise ValidationError(
_('No html directory found in the zip file.'))
temp.cleanup()
7 changes: 6 additions & 1 deletion src/onegov/landsgemeinde/layouts/assembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,12 @@ def editbar_links(self) -> list[Link | LinkGroup] | None:
text=_('Agenda item'),
url=self.request.link(items, 'new'),
attrs={'class': 'check-list-link'}
)
),
Link(
text=_('ZIP Upload'),
url=self.request.link(items, 'new-import'),
attrs={'class': 'ticket-archive'}
),
]
)
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,18 @@ msgstr "Nummer wird bereits verwendet."
msgid "Invalid timestamp."
msgstr "Ungültiger Zeitstempel."

msgid "Agenda Item ZIP"
msgstr "Traktandum ZIP"

msgid "Import"
msgstr "Importieren"

msgid "No html directory found in the zip file."
msgstr "Keine HTML-Dateien im ZIP-Archiv gefunden."

msgid "No file uploaded."
msgstr "Keine Datei hochgeladen."

msgid "Date"
msgstr "Datum"

Expand Down Expand Up @@ -293,6 +305,9 @@ msgstr "Möchten Sie diese Landsgemeinde wirklich löschen?"
msgid "Delete assembly"
msgstr "Landsgemeinde löschen"

msgid "ZIP Upload"
msgstr "ZIP-Upload"

#, python-format
msgid "Extraodinary ${assembly_type} from ${date}"
msgstr "Ausserordentliche ${assembly_type} vom ${date}"
Expand Down Expand Up @@ -419,6 +434,9 @@ msgstr "Neu"
msgid "New agenda item"
msgstr "Neues Traktandum"

msgid "Imported a new agenda item"
msgstr "Neues Traktandum importiert"

msgid "Your changes were saved"
msgstr "Ihre Änderungen wurden gespeichert"

Expand Down Expand Up @@ -446,12 +464,6 @@ msgstr "Neue Wortmeldung hinzugefügt"
msgid "New votum"
msgstr "Neue Wortmeldung"

#~ msgid "Added a new assembly"
#~ msgstr "Neue Landsgemeinde hinzugefügt"

#~ msgid "New assembly"
#~ msgstr "Neue Landsgemeinde"

#, python-format
#~ msgid "Extraodinary assembly from ${date}"
#~ msgstr "Ausserordentliche Landsgemeinde vom ${date}"
Expand Down
30 changes: 30 additions & 0 deletions src/onegov/landsgemeinde/views/agenda.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from onegov.landsgemeinde import LandsgemeindeApp
from onegov.landsgemeinde.collections import AgendaItemCollection
from onegov.landsgemeinde.forms import AgendaItemForm
from onegov.landsgemeinde.forms.agenda import AgendaItemUploadForm
from onegov.landsgemeinde.layouts import AgendaItemCollectionLayout
from onegov.landsgemeinde.layouts import AgendaItemLayout
from onegov.landsgemeinde.models import AgendaItem
Expand Down Expand Up @@ -62,6 +63,35 @@ def add_agenda_item(
}


@LandsgemeindeApp.form(
model=AgendaItemCollection,
name='new-import',
template='form.pt',
permission=Private,
form=AgendaItemUploadForm
)
def import_agenda_item(
self: AgendaItemCollection,
request: LandsgemeindeRequest,
form: AgendaItemUploadForm
) -> RenderData | Response:

if form.submitted(request):
agenda_item = form.import_agenda_item(self)
request.success(_('Imported a new agenda item'))
return redirect(request.link(agenda_item, 'edit'))

layout = AgendaItemCollectionLayout(self, request)
layout.breadcrumbs.append(Link(_('Import'), '#'))
layout.editbar_links = []

return {
'layout': layout,
'title': _('New agenda item'),
'form': form,
}


@LandsgemeindeApp.html(
model=AgendaItem,
template='agenda_item.pt',
Expand Down