Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -913,6 +913,37 @@ models/enwiki.damaging.gradient_boosting.model: \

revscoring model_info $@ > model_info/enwiki.damaging.md


datasets/enwiki.autolabeled_revisions.w_cache.20k_2015.json: \
datasets/enwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
./utility autolabel --host=https://en.wikipedia.org \
--trusted-groups=bot,bureaucrat,sysop \
--trusted-edits=1000 \
--revert-radius=5 \
--verbose > $@


models/enwiki.reverted.gradient_boosting.model: \
datasets/enwiki.autolabeled_revisions.w_cache.20k_2015.json
cat $< | \
revscoring cv_train \
revscoring.scoring.models.GradientBoosting \
editquality.feature_lists.enwiki.damaging \
reverted_for_damage \
--version=$(damaging_major_minor).1 \
-p 'learning_rate=0.01' \
-p 'max_depth=7' \
-p 'max_features="log2"' \
-p 'n_estimators=700' \
--label-weight $(damaging_weight) \
--pop-rate "true=0.034163555464634586" \
--pop-rate "false=0.9658364445353654" \
--center --scale > $@

revscoring model_info $@ > model_info/enwiki.reverted.md


tuning_reports/enwiki.goodfaith.md: \
datasets/enwiki.labeled_revisions.w_cache.20k_2015.json
cat $< | \
Expand Down
81 changes: 81 additions & 0 deletions model_info/enwiki.reverted.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
Model Information:
- type: GradientBoosting
- version: 0.5.1
- params: {'n_iter_no_change': None, 'validation_fraction': 0.1, 'ccp_alpha': 0.0, 'min_weight_fraction_leaf': 0.0, 'n_estimators': 700, 'min_impurity_split': None, 'init': None, 'subsample': 1.0, 'multilabel': False, 'label_weights': OrderedDict([(True, 10)]), 'max_depth': 7, 'scale': True, 'max_features': 'log2', 'tol': 0.0001, 'presort': 'deprecated', 'labels': [True, False], 'min_samples_split': 2, 'population_rates': None, 'warm_start': False, 'center': True, 'criterion': 'friedman_mse', 'min_samples_leaf': 1, 'max_leaf_nodes': None, 'loss': 'deviance', 'learning_rate': 0.01, 'random_state': None, 'verbose': 0, 'min_impurity_decrease': 0.0}
Environment:
- revscoring_version: '2.8.2'
- platform: 'Linux-4.9.0-12-amd64-x86_64-with-debian-9.12'
- machine: 'x86_64'
- version: '#1 SMP Debian 4.9.210-1+deb9u1 (2020-06-07)'
- system: 'Linux'
- processor: ''
- python_build: ('default', 'Sep 27 2018 17:25:39')
- python_compiler: 'GCC 6.3.0 20170516'
- python_branch: ''
- python_implementation: 'CPython'
- python_revision: ''
- python_version: '3.5.3'
- release: '4.9.0-12-amd64'

Statistics:
counts (n=19205):
label n ~True ~False
------- ----- --- ------- --------
True 1247 --> 745 502
False 17958 --> 1692 16266
rates:
True False
---------- ------ -------
sample 0.065 0.935
population 0.034 0.966
match_rate (micro=0.862, macro=0.5):
True False
------ -------
0.111 0.889
filter_rate (micro=0.138, macro=0.5):
True False
------ -------
0.889 0.111
recall (micro=0.895, macro=0.752):
True False
------ -------
0.597 0.906
!recall (micro=0.608, macro=0.752):
True False
------ -------
0.906 0.597
precision (micro=0.957, macro=0.584):
True False
------ -------
0.183 0.985
!precision (micro=0.211, macro=0.584):
True False
------ -------
0.985 0.183
f1 (micro=0.921, macro=0.612):
True False
------ -------
0.28 0.944
!f1 (micro=0.303, macro=0.612):
True False
------ -------
0.944 0.28
accuracy (micro=0.895, macro=0.895):
True False
------ -------
0.895 0.895
fpr (micro=0.392, macro=0.248):
True False
------ -------
0.094 0.403
roc_auc (micro=0.862, macro=0.862):
True False
------ -------
0.862 0.862
pr_auc (micro=0.97, macro=0.647):
True False
------ -------
0.3 0.993

- score_schema: {'properties': {'prediction': {'description': 'The most likely label predicted by the estimator', 'type': 'boolean'}, 'probability': {'description': 'A mapping of probabilities onto each of the potential output labels', 'properties': {'false': {'type': 'number'}, 'true': {'type': 'number'}}, 'type': 'object'}}, 'title': 'Scikit learn-based classifier score with probability', 'type': 'object'}

3 changes: 3 additions & 0 deletions models/enwiki.reverted.gradient_boosting.model
Git LFS file not shown