Skip to content
This repository was archived by the owner on May 1, 2024. It is now read-only.

Commit e1b528a

Browse files
committed
Python 2/3 compatibility for finance tasks
This does not change any infrastructure to actually run anything under python 3, it only represents the results of my testing of the finance unit tests and acceptance tests under python 3 and modernizing the code to become both python 2 and 3 compatible.
1 parent c28f775 commit e1b528a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+645
-369
lines changed

.isort.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
[settings]
22
line_length=120
33
multi_line_output=5
4+
known_future_library=future

.travis.yml

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,33 @@ env:
1818
- secure: NLqmm18NpV3JRwD4CaugXm5cMWgxjdOA88xRFocmmVrduv0QT9JxBZFGebLYmFQOoKNJ23hz6g3EHe1aWfhLYnr1iUYerrwIriSI1wzuqbXJBRN6gO2n3YW+IfG83OLMZkOIMswT8MEdT3JPWVJL3bsocjHp8bYhRCt1KTCMJjY=
1919
- secure: aG8l39jaLFWXB5CEOOAR9mJTT3GnqxCl/oFM/7NvTZCBoSWIPIztpFhSAkRE9xSIiKUKXakZcL5H349NLC28jdlHPVsNAaKKt2YNhB6MjmePihp3RPwZGn8c/SjslwY7DPVUKMdWsI7AVNJBH8ab30OPxKwXFAMOiJJza206CYQ=
2020

21+
# TODO: re-introduce the coverage test.
22+
matrix:
23+
# Mark travis build as finished before jobs under allow_failures complete.
24+
fast_finish: true
25+
26+
include:
27+
# Standard unit tests.
28+
- name: "Python 2.7 Unit Tests"
29+
env: TEST_SUITE=test-docker
30+
31+
# Python 3 whitelisted and full unit test jobs. Once python 3 support is
32+
# complete, delete the whitelist job and remove the full job from
33+
# allow_failures.
34+
- name: "Python 3.x Whitelisted Unit Tests"
35+
env: TEST_SUITE=test-docker-py3-whitelist
36+
- name: "Python 3.x FULL Unit Tests"
37+
env: TEST_SUITE=test-docker-py3
38+
39+
- name: "Quality Tests"
40+
env: TEST_SUITE=quality-docker
41+
42+
# Names of jobs (defined above) that cannot fail the travis build even if
43+
# they fail.
44+
allow_failures:
45+
- name: "Python 3.x FULL Unit Tests"
46+
- name: "Quality Tests" # This is here because isort is a hot mess right now.
47+
2148
# Do NOT install Python requirements.
2249
# Doing so is a waste of time since they won't be used.
2350
install: true
@@ -37,10 +64,7 @@ before_install:
3764
# Ensure we have a place to store coverage output
3865
- mkdir -p coverage
3966

40-
script:
41-
- make test-docker
42-
- make quality-docker
43-
- make coverage-docker
67+
script: make $TEST_SUITE
4468

4569
after_success:
4670
- pip install --upgrade codecov

Makefile

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
uninstall:
55
pip install -r requirements/pip.txt
6-
while pip uninstall -y edx.analytics.tasks; do true; done
6+
pip uninstall -y edx.analytics.tasks
77
python setup.py clean
88

99
install: requirements uninstall
@@ -28,7 +28,7 @@ docker-shell:
2828
system-requirements:
2929
ifeq (,$(wildcard /usr/bin/yum))
3030
# This is not great, we can't use these libraries on slave nodes using this method.
31-
sudo apt-get install -y -q libmysqlclient-dev libpq-dev python-dev libffi-dev libssl-dev libxml2-dev libxslt1-dev
31+
sudo apt-get install -y -q libmysqlclient-dev libpq-dev python-dev python3-dev libffi-dev libssl-dev libxml2-dev libxslt1-dev
3232
else
3333
sudo yum install -y -q postgresql-devel libffi-devel
3434
endif
@@ -56,20 +56,48 @@ upgrade: ## update the requirements/*.txt files with the latest packages satisfy
5656
CUSTOM_COMPILE_COMMAND="make upgrade" pip-compile --upgrade -o requirements/docs.txt requirements/docs.in
5757
CUSTOM_COMPILE_COMMAND="make upgrade" pip-compile --upgrade -o requirements/test.txt requirements/test.in
5858

59-
test-docker-local:
60-
docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make develop-local test-local
61-
59+
# Entry point for running python 2 unit tests in CI.
6260
test-docker:
63-
docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make reset-virtualenv test-requirements develop-local test-local
61+
docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make system-requirements reset-virtualenv test-requirements develop-local test-local
6462

63+
# Entry point for running python 3 unit tests in CI.
6564
test-docker-py3:
66-
docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make reset-virtualenv-py3 test-requirements develop-local test-local
65+
docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make system-requirements reset-virtualenv-py3 test-requirements develop-local test-local
66+
67+
# Entry point for running python 3 unit tests in CI. Only invokes a subset
68+
# (whitelist) of unit tests which are known to pass under python 3.
69+
test-docker-py3-whitelist:
70+
docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make system-requirements reset-virtualenv-py3 test-requirements develop-local test-local-py3-whitelist
6771

6872
test-local:
6973
# TODO: when we have better coverage, modify this to actually fail when coverage is too low.
7074
rm -rf .coverage
7175
LUIGI_CONFIG_PATH='config/test.cfg' python -m coverage run --rcfile=./.coveragerc -m nose --with-xunit --xunit-file=unittests.xml -A 'not acceptance'
7276

77+
# Speical test-local target specifically for running a whitelist of tests which
78+
# are known to pass under python 3
79+
test-local-py3-whitelist:
80+
# TODO: when we have better coverage, modify this to actually fail when coverage is too low.
81+
rm -rf .coverage
82+
LUIGI_CONFIG_PATH='config/test.cfg' python -m coverage run --rcfile=./.coveragerc -m nose --with-xunit --xunit-file=unittests.xml -A 'not acceptance' \
83+
edx.analytics.tasks.enterprise.tests \
84+
edx.analytics.tasks.insights.tests.test_database_imports \
85+
edx.analytics.tasks.insights.tests.test_grades \
86+
edx.analytics.tasks.monitor.tests.test_overall_events \
87+
edx.analytics.tasks.tests \
88+
edx.analytics.tasks.util.tests.helpers \
89+
edx.analytics.tasks.util.tests.opaque_key_mixins \
90+
edx.analytics.tasks.util.tests.test_decorators \
91+
edx.analytics.tasks.util.tests.test_geolocation \
92+
edx.analytics.tasks.util.tests.test_hive \
93+
edx.analytics.tasks.util.tests.test_retry \
94+
edx.analytics.tasks.util.tests.test_s3_util \
95+
edx.analytics.tasks.util.tests.test_url \
96+
edx.analytics.tasks.warehouse.financial.tests \
97+
edx.analytics.tasks.warehouse.tests.test_internal_reporting_active_users \
98+
edx.analytics.tasks.warehouse.tests.test_internal_reporting_database \
99+
edx.analytics.tasks.warehouse.tests.test_run_vertica_sql_scripts
100+
73101
test: test-requirements develop test-local
74102

75103
test-acceptance: test-requirements
@@ -98,7 +126,7 @@ quality-docker-local:
98126
docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make develop-local quality-local
99127

100128
quality-docker:
101-
docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make reset-virtualenv test-requirements develop-local quality-local
129+
docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make system-requirements reset-virtualenv test-requirements develop-local quality-local
102130

103131
coverage-docker:
104132
docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest coverage xml

edx/analytics/tasks/common/bigquery_load.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,14 @@
1+
from __future__ import absolute_import
2+
from future.standard_library import install_aliases
3+
install_aliases()
4+
15
import json
26
import logging
37
import os
48
import subprocess
59
import tempfile
610
import time
7-
import urlparse
11+
from urllib.parse import urlparse
812

913
import luigi
1014

@@ -216,7 +220,7 @@ def field_delimiter(self):
216220

217221
@property
218222
def null_marker(self):
219-
return '\N'
223+
return r'\N'
220224

221225
@property
222226
def quote_character(self):
@@ -262,7 +266,7 @@ def init_copy(self, client):
262266
self.output().clear_marker_table()
263267

264268
def _get_destination_from_source(self, source_path):
265-
parsed_url = urlparse.urlparse(source_path)
269+
parsed_url = urlparse(source_path)
266270
destination_path = url_path_join('gs://{}'.format(parsed_url.netloc), parsed_url.path)
267271
return destination_path
268272

edx/analytics/tasks/common/mapreduce.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import logging
88
import logging.config
99
import os
10-
import StringIO
10+
from io import StringIO
1111
from hashlib import md5
1212

1313
import luigi
@@ -183,7 +183,7 @@ class EmulatedMapReduceJobRunner(luigi.contrib.hadoop.JobRunner):
183183
"""
184184

185185
def group(self, input):
186-
output = StringIO.StringIO()
186+
output = StringIO()
187187
lines = []
188188
for i, line in enumerate(input):
189189
parts = line.rstrip('\n').split('\t')
@@ -197,7 +197,7 @@ def group(self, input):
197197
def run_job(self, job):
198198
job.init_hadoop()
199199
job.init_mapper()
200-
map_output = StringIO.StringIO()
200+
map_output = StringIO()
201201
input_targets = luigi.task.flatten(job.input_hadoop())
202202
for input_target in input_targets:
203203
# if file is a directory, then assume that it's Hadoop output,
@@ -232,7 +232,7 @@ def run_job(self, job):
232232
try:
233233
reduce_output = job.output().open('w')
234234
except Exception:
235-
reduce_output = StringIO.StringIO()
235+
reduce_output = StringIO()
236236

237237
try:
238238
job._run_reducer(reduce_input, reduce_output)

edx/analytics/tasks/common/mysql_load.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -422,7 +422,7 @@ def coerce_for_mysql_connect(input):
422422
return input
423423
# Hive indicates a null value with the string "\N"
424424
# We represent an infinite value with the string "inf", MySQL has no such representation so we use NULL
425-
if input in ('None', '\\N', 'inf', '-inf'):
425+
if input in ('None', r'\N', 'inf', '-inf'):
426426
return None
427427
if isinstance(input, str):
428428
return input.decode('utf-8')

edx/analytics/tasks/common/pathutil.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -268,12 +268,12 @@ def get_event_and_date_string(self, line):
268268
"""Default mapper implementation, that always outputs the log line, but with a configurable key."""
269269
event = eventlog.parse_json_event(line)
270270
if event is None:
271-
self.incr_counter('Event', 'Discard Unparseable Event', 1)
271+
self.incr_counter(u'Event', u'Discard Unparseable Event', 1)
272272
return None
273273

274274
event_time = self.get_event_time(event)
275275
if not event_time:
276-
self.incr_counter('Event', 'Discard Missing Time Field', 1)
276+
self.incr_counter(u'Event', u'Discard Missing Time Field', 1)
277277
return None
278278

279279
# Don't use strptime to parse the date, it is extremely slow
@@ -283,7 +283,7 @@ def get_event_and_date_string(self, line):
283283
date_string = event_time.split("T")[0]
284284

285285
if date_string < self.lower_bound_date_string or date_string >= self.upper_bound_date_string:
286-
# Slow: self.incr_counter('Event', 'Discard Outside Date Interval', 1)
286+
# Slow: self.incr_counter(u'Event', u'Discard Outside Date Interval', 1)
287287
return None
288288

289289
return event, date_string
@@ -307,5 +307,5 @@ def get_map_input_file(self):
307307
return os.environ['map_input_file']
308308
except KeyError:
309309
log.warn('mapreduce_map_input_file not defined in os.environ, unable to determine input file path')
310-
self.incr_counter('Event', 'Missing map_input_file', 1)
310+
self.incr_counter(u'Event', u'Missing map_input_file', 1)
311311
return ''

edx/analytics/tasks/common/sqoop.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""
22
Gather data using Sqoop table dumps run on RDBMS databases.
33
"""
4+
from __future__ import absolute_import
5+
46
import datetime
57
import json
68
import logging
@@ -296,7 +298,12 @@ def run_job(self, job):
296298
metadata['end_time'] = datetime.datetime.utcnow().isoformat()
297299
try:
298300
with job.metadata_output().open('w') as metadata_file:
299-
json.dump(metadata, metadata_file)
301+
# Under python 2, json.dumps() will return ascii-only bytes, so .encode('utf-8')
302+
# is a no-op. Under python 3, json.dumps() will return ascii-only unicode, so
303+
# .encode('utf-8') will return bytes, thus normalizing the output to bytes
304+
# across all python versions.
305+
metadata_file.write(json.dumps(metadata).encode('utf-8'))
306+
metadata_file.flush()
300307
except Exception:
301308
log.exception("Unable to dump metadata information.")
302309
pass

edx/analytics/tasks/common/tests/test_sqoop.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -197,12 +197,12 @@ def test_connect_with_columns(self):
197197
self.assertEquals(arglist[-3], 'column1,column2')
198198

199199
def test_connect_with_null_string(self):
200-
self.create_and_run_mysql_task(null_string='\\\\N')
200+
self.create_and_run_mysql_task(null_string=r'\\N')
201201
arglist = self.get_call_args_after_run()
202202
self.assertEquals(arglist[-6], '--null-string')
203-
self.assertEquals(arglist[-5], '\\\\N')
203+
self.assertEquals(arglist[-5], r'\\N')
204204
self.assertEquals(arglist[-4], '--null-non-string')
205-
self.assertEquals(arglist[-3], '\\\\N')
205+
self.assertEquals(arglist[-3], r'\\N')
206206

207207
def test_connect_with_fields_terminations(self):
208208
self.create_and_run_mysql_task(fields_terminated_by='\x01')

edx/analytics/tasks/common/vertica_load.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Support for loading data into an HP Vertica database.
33
"""
4+
from __future__ import absolute_import
45

56
import logging
67
import traceback
@@ -12,6 +13,7 @@
1213
from edx.analytics.tasks.util.overwrite import OverwriteOutputMixin
1314
from edx.analytics.tasks.util.url import ExternalURL
1415
from edx.analytics.tasks.util.vertica_target import CredentialFileVerticaTarget
16+
import six
1517

1618
log = logging.getLogger(__name__)
1719

@@ -416,7 +418,7 @@ def copy_delimiter(self):
416418
@property
417419
def copy_null_sequence(self):
418420
"""The null sequence in the data to be copied. Default is Hive NULL (\\N)"""
419-
return "'\\N'"
421+
return r"'\N'"
420422

421423
@property
422424
def copy_enclosed_by(self):
@@ -437,7 +439,7 @@ def copy_escape_spec(self):
437439

438440
def copy_data_table_from_target(self, cursor):
439441
"""Performs the copy query from the insert source."""
440-
if isinstance(self.columns[0], basestring):
442+
if isinstance(self.columns[0], six.string_types):
441443
column_names = ','.join([name for name in self.columns])
442444
elif len(self.columns[0]) == 2:
443445
column_names = ','.join([name for name, _type in self.columns])

0 commit comments

Comments
 (0)