Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/docs/user_guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ This method takes 5 optional parameters:
not available.
* If set to `False`, `pandarallel` will use multiprocessing data transfer (pipe) to
transfer data between the main process and workers.
- `leave`: (bool, `True` by default): whether to keep the progress bar after running on a notebook or not.
* If set to `True`, will leave the notebook progress bar after running.
* If set to `False`, will delete the notebook progress bar after running.

Using memory file system reduces data transfer time between the main process and
workers, especially for big data.
Expand Down
22 changes: 13 additions & 9 deletions pandarallel/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,7 @@ def parallelize_with_memory_file_system(
nb_requested_workers: int,
data_type: Type[DataType],
progress_bars_type: ProgressBarsType,
leave: bool = True,
):
def closure(
data: Any,
Expand Down Expand Up @@ -239,7 +240,7 @@ def closure(

show_progress_bars = progress_bars_type != ProgressBarsType.No

progress_bars = get_progress_bars(progresses_length, show_progress_bars)
progress_bars = get_progress_bars(progresses_length, show_progress_bars, leave)
progresses = [0] * nb_workers
workers_status = [WorkerStatus.Running] * nb_workers

Expand Down Expand Up @@ -344,6 +345,7 @@ def parallelize_with_pipe(
nb_requested_workers: int,
data_type: Type[DataType],
progress_bars_type: ProgressBarsType,
leave: bool = True,
):
def closure(
data: Any,
Expand Down Expand Up @@ -380,7 +382,7 @@ def closure(

show_progress_bars = progress_bars_type != ProgressBarsType.No

progress_bars = get_progress_bars(progresses_length, show_progress_bars)
progress_bars = get_progress_bars(progresses_length, show_progress_bars, leave)
progresses = [0] * nb_workers
workers_status = [WorkerStatus.Running] * nb_workers

Expand Down Expand Up @@ -446,6 +448,7 @@ def initialize(
progress_bar=False,
verbose=2,
use_memory_fs: Optional[bool] = None,
leave: Optional[bool] = True,
) -> None:
show_progress_bars = progress_bar
is_memory_fs_available = Path(MEMORY_FS_ROOT).exists()
Expand Down Expand Up @@ -510,36 +513,37 @@ def initialize(

# DataFrame
pd.DataFrame.parallel_apply = parallelize(
nb_workers, DataFrame.Apply, progress_bars_in_user_defined_function
nb_workers, DataFrame.Apply, progress_bars_in_user_defined_function, leave
)
pd.DataFrame.parallel_applymap = parallelize(
nb_workers,
DataFrame.ApplyMap,
progress_bars_in_user_defined_function_multiply_by_number_of_columns,
leave,
)

# DataFrame GroupBy
PandaDataFrameGroupBy.parallel_apply = parallelize(
nb_workers, DataFrameGroupBy.Apply, progress_bars_in_user_defined_function
nb_workers, DataFrameGroupBy.Apply, progress_bars_in_user_defined_function, leave
)

# Expanding GroupBy
PandasExpandingGroupby.parallel_apply = parallelize(
nb_workers, ExpandingGroupBy.Apply, progress_bars_in_work_function
nb_workers, ExpandingGroupBy.Apply, progress_bars_in_work_function, leave
)

# Rolling GroupBy
PandasRollingGroupby.parallel_apply = parallelize(
nb_workers, RollingGroupBy.Apply, progress_bars_in_work_function
nb_workers, RollingGroupBy.Apply, progress_bars_in_work_function, leave
)

# Series
pd.Series.parallel_apply = parallelize(
nb_workers, Series.Apply, progress_bars_in_user_defined_function
nb_workers, Series.Apply, progress_bars_in_user_defined_function, leave
)
pd.Series.parallel_map = parallelize(nb_workers, Series.Map, show_progress_bars)
pd.Series.parallel_map = parallelize(nb_workers, Series.Map, show_progress_bars, leave)

# Series Rolling
pd.core.window.Rolling.parallel_apply = parallelize(
nb_workers, SeriesRolling.Apply, progress_bars_in_user_defined_function
nb_workers, SeriesRolling.Apply, progress_bars_in_user_defined_function, leave
)
10 changes: 7 additions & 3 deletions pandarallel/progress_bars.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,13 @@ def update(self, values: List[int]) -> None:


class ProgressBarsNotebookLab(ProgressBars):
def __init__(self, maxs: List[int], show: bool) -> None:
def __init__(self, maxs: List[int], show: bool, leave: bool = True) -> None:
"""Initialization.
Positional argument:
maxs - List containing the max value of each progress bar
"""
self.__show = show
self.__leave = leave

if not show:
return
Expand Down Expand Up @@ -159,6 +160,9 @@ def update(self, values: List[int]) -> None:

if value >= bar.max:
bar.bar_style = "success"
if not self.__leave:
bar.layout.display = 'none'
label.layout.display = 'none'

label.value = "{} / {}".format(value, bar.max)

Expand All @@ -172,10 +176,10 @@ def set_error(self, index: int) -> None:


def get_progress_bars(
maxs: List[int], show
maxs: List[int], show, leave=True,
) -> Union[ProgressBarsNotebookLab, ProgressBarsConsole]:
return (
ProgressBarsNotebookLab(maxs, show)
ProgressBarsNotebookLab(maxs, show, leave)
if is_notebook_lab()
else ProgressBarsConsole(maxs, show)
)
Expand Down