openedx-unsupported
diff --git a/‎.isort.cfg‎
Lines changed: 1 addition & 0 deletions b/‎.isort.cfg‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.travis.yml‎
Lines changed: 28 additions & 4 deletions b/‎.travis.yml‎
Lines changed: 28 additions & 4 deletions
diff --git a/‎Makefile‎
Lines changed: 36 additions & 8 deletions b/‎Makefile‎
Lines changed: 36 additions & 8 deletions
diff --git a/‎edx/analytics/tasks/common/bigquery_load.py‎
Lines changed: 7 additions & 3 deletions b/‎edx/analytics/tasks/common/bigquery_load.py‎
Lines changed: 7 additions & 3 deletions
diff --git a/‎edx/analytics/tasks/common/mapreduce.py‎
Lines changed: 4 additions & 4 deletions b/‎edx/analytics/tasks/common/mapreduce.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎edx/analytics/tasks/common/mysql_load.py‎
Lines changed: 1 addition & 1 deletion b/‎edx/analytics/tasks/common/mysql_load.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎edx/analytics/tasks/common/pathutil.py‎
Lines changed: 4 additions & 4 deletions b/‎edx/analytics/tasks/common/pathutil.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎edx/analytics/tasks/common/sqoop.py‎
Lines changed: 8 additions & 1 deletion b/‎edx/analytics/tasks/common/sqoop.py‎
Lines changed: 8 additions & 1 deletion
diff --git a/‎edx/analytics/tasks/common/tests/test_sqoop.py‎
Lines changed: 3 additions & 3 deletions b/‎edx/analytics/tasks/common/tests/test_sqoop.py‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎edx/analytics/tasks/common/vertica_load.py‎
Lines changed: 4 additions & 2 deletions b/‎edx/analytics/tasks/common/vertica_load.py‎
Lines changed: 4 additions & 2 deletions
@@ -1,3 +1,4 @@
 [settings]
 line_length=120
 multi_line_output=5
+known_future_library=future
@@ -18,6 +18,33 @@ env:
     - secure: NLqmm18NpV3JRwD4CaugXm5cMWgxjdOA88xRFocmmVrduv0QT9JxBZFGebLYmFQOoKNJ23hz6g3EHe1aWfhLYnr1iUYerrwIriSI1wzuqbXJBRN6gO2n3YW+IfG83OLMZkOIMswT8MEdT3JPWVJL3bsocjHp8bYhRCt1KTCMJjY=
     - secure: aG8l39jaLFWXB5CEOOAR9mJTT3GnqxCl/oFM/7NvTZCBoSWIPIztpFhSAkRE9xSIiKUKXakZcL5H349NLC28jdlHPVsNAaKKt2YNhB6MjmePihp3RPwZGn8c/SjslwY7DPVUKMdWsI7AVNJBH8ab30OPxKwXFAMOiJJza206CYQ=
 
+# TODO: re-introduce the coverage test.
+matrix:
+  # Mark travis build as finished before jobs under allow_failures complete.
+  fast_finish: true
+
+  include:
+    # Standard unit tests.
+    - name: "Python 2.7 Unit Tests"
+      env: TEST_SUITE=test-docker
+
+    # Python 3 whitelisted and full unit test jobs.  Once python 3 support is
+    # complete, delete the whitelist job and remove the full job from
+    # allow_failures.
+    - name: "Python 3.x Whitelisted Unit Tests"
+      env: TEST_SUITE=test-docker-py3-whitelist
+    - name: "Python 3.x FULL Unit Tests"
+      env: TEST_SUITE=test-docker-py3
+
+    - name: "Quality Tests"
+      env: TEST_SUITE=quality-docker
+
+  # Names of jobs (defined above) that cannot fail the travis build even if
+  # they fail.
+  allow_failures:
+    - name: "Python 3.x FULL Unit Tests"
+    - name: "Quality Tests"  # This is here because isort is a hot mess right now.
+
 # Do NOT install Python requirements.
 # Doing so is a waste of time since they won't be used.
 install: true
@@ -37,10 +64,7 @@ before_install:
   # Ensure we have a place to store coverage output
   - mkdir -p coverage
 
-script:
-  - make test-docker
-  - make quality-docker
-  - make coverage-docker
+script: make $TEST_SUITE
 
 after_success:
   - pip install --upgrade codecov
 
@@ -3,7 +3,7 @@
 
 uninstall:
 	pip install -r requirements/pip.txt
-	while pip uninstall -y edx.analytics.tasks; do true; done
+	pip uninstall -y edx.analytics.tasks
 	python setup.py clean
 
 install: requirements uninstall
@@ -28,7 +28,7 @@ docker-shell:
 system-requirements:
 ifeq (,$(wildcard /usr/bin/yum))
 	# This is not great, we can't use these libraries on slave nodes using this method.
-	sudo apt-get install -y -q libmysqlclient-dev libpq-dev python-dev libffi-dev libssl-dev libxml2-dev libxslt1-dev
+	sudo apt-get install -y -q libmysqlclient-dev libpq-dev python-dev python3-dev libffi-dev libssl-dev libxml2-dev libxslt1-dev
 else
 	sudo yum install -y -q postgresql-devel libffi-devel
 endif
@@ -56,20 +56,48 @@ upgrade: ## update the requirements/*.txt files with the latest packages satisfy
 	CUSTOM_COMPILE_COMMAND="make upgrade" pip-compile --upgrade -o requirements/docs.txt requirements/docs.in
 	CUSTOM_COMPILE_COMMAND="make upgrade" pip-compile --upgrade -o requirements/test.txt requirements/test.in
 
-test-docker-local:
-	docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make develop-local test-local
-
+# Entry point for running python 2 unit tests in CI.
 test-docker:
-	docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make reset-virtualenv test-requirements develop-local test-local
+	docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make system-requirements reset-virtualenv test-requirements develop-local test-local
 
+# Entry point for running python 3 unit tests in CI.
 test-docker-py3:
-	docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make reset-virtualenv-py3 test-requirements develop-local test-local
+	docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make system-requirements reset-virtualenv-py3 test-requirements develop-local test-local
+
+# Entry point for running python 3 unit tests in CI.  Only invokes a subset
+# (whitelist) of unit tests which are known to pass under python 3.
+test-docker-py3-whitelist:
+	docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make system-requirements reset-virtualenv-py3 test-requirements develop-local test-local-py3-whitelist
 
 test-local:
 	# TODO: when we have better coverage, modify this to actually fail when coverage is too low.
 	rm -rf .coverage
 	LUIGI_CONFIG_PATH='config/test.cfg' python -m coverage run --rcfile=./.coveragerc -m nose --with-xunit --xunit-file=unittests.xml -A 'not acceptance'
 
+# Speical test-local target specifically for running a whitelist of tests which
+# are known to pass under python 3
+test-local-py3-whitelist:
+	# TODO: when we have better coverage, modify this to actually fail when coverage is too low.
+	rm -rf .coverage
+	LUIGI_CONFIG_PATH='config/test.cfg' python -m coverage run --rcfile=./.coveragerc -m nose --with-xunit --xunit-file=unittests.xml -A 'not acceptance' \
+	    edx.analytics.tasks.enterprise.tests \
+	    edx.analytics.tasks.insights.tests.test_database_imports \
+	    edx.analytics.tasks.insights.tests.test_grades \
+	    edx.analytics.tasks.monitor.tests.test_overall_events \
+	    edx.analytics.tasks.tests \
+	    edx.analytics.tasks.util.tests.helpers \
+	    edx.analytics.tasks.util.tests.opaque_key_mixins \
+	    edx.analytics.tasks.util.tests.test_decorators \
+	    edx.analytics.tasks.util.tests.test_geolocation \
+	    edx.analytics.tasks.util.tests.test_hive \
+	    edx.analytics.tasks.util.tests.test_retry \
+	    edx.analytics.tasks.util.tests.test_s3_util \
+	    edx.analytics.tasks.util.tests.test_url \
+	    edx.analytics.tasks.warehouse.financial.tests \
+	    edx.analytics.tasks.warehouse.tests.test_internal_reporting_active_users \
+	    edx.analytics.tasks.warehouse.tests.test_internal_reporting_database \
+	    edx.analytics.tasks.warehouse.tests.test_run_vertica_sql_scripts
+
 test: test-requirements develop test-local
 
 test-acceptance: test-requirements
@@ -98,7 +126,7 @@ quality-docker-local:
 	docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make develop-local quality-local
 
 quality-docker:
-	docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make reset-virtualenv test-requirements develop-local quality-local
+	docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest make system-requirements reset-virtualenv test-requirements develop-local quality-local
 
 coverage-docker:
 	docker run --rm -u root -v `(pwd)`:/edx/app/analytics_pipeline/analytics_pipeline -it edxops/analytics_pipeline:latest coverage xml
 
@@ -1,10 +1,14 @@
+from __future__ import absolute_import
+from future.standard_library import install_aliases
+install_aliases()
+
 import json
 import logging
 import os
 import subprocess
 import tempfile
 import time
-import urlparse
+from urllib.parse import urlparse
 
 import luigi
 
@@ -216,7 +220,7 @@ def field_delimiter(self):
 
     @property
     def null_marker(self):
-        return '\N'
+        return r'\N'
 
     @property
     def quote_character(self):
@@ -262,7 +266,7 @@ def init_copy(self, client):
                     self.output().clear_marker_table()
 
     def _get_destination_from_source(self, source_path):
-        parsed_url = urlparse.urlparse(source_path)
+        parsed_url = urlparse(source_path)
         destination_path = url_path_join('gs://{}'.format(parsed_url.netloc), parsed_url.path)
         return destination_path
 
 
@@ -7,7 +7,7 @@
 import logging
 import logging.config
 import os
-import StringIO
+from io import StringIO
 from hashlib import md5
 
 import luigi
@@ -183,7 +183,7 @@ class EmulatedMapReduceJobRunner(luigi.contrib.hadoop.JobRunner):
     """
 
     def group(self, input):
-        output = StringIO.StringIO()
+        output = StringIO()
         lines = []
         for i, line in enumerate(input):
             parts = line.rstrip('\n').split('\t')
@@ -197,7 +197,7 @@ def group(self, input):
     def run_job(self, job):
         job.init_hadoop()
         job.init_mapper()
-        map_output = StringIO.StringIO()
+        map_output = StringIO()
         input_targets = luigi.task.flatten(job.input_hadoop())
         for input_target in input_targets:
             # if file is a directory, then assume that it's Hadoop output,
@@ -232,7 +232,7 @@ def run_job(self, job):
         try:
             reduce_output = job.output().open('w')
         except Exception:
-            reduce_output = StringIO.StringIO()
+            reduce_output = StringIO()
 
         try:
             job._run_reducer(reduce_input, reduce_output)
 
@@ -422,7 +422,7 @@ def coerce_for_mysql_connect(input):
         return input
     # Hive indicates a null value with the string "\N"
     # We represent an infinite value with the string "inf", MySQL has no such representation so we use NULL
-    if input in ('None', '\\N', 'inf', '-inf'):
+    if input in ('None', r'\N', 'inf', '-inf'):
         return None
     if isinstance(input, str):
         return input.decode('utf-8')
 
@@ -268,12 +268,12 @@ def get_event_and_date_string(self, line):
         """Default mapper implementation, that always outputs the log line, but with a configurable key."""
         event = eventlog.parse_json_event(line)
         if event is None:
-            self.incr_counter('Event', 'Discard Unparseable Event', 1)
+            self.incr_counter(u'Event', u'Discard Unparseable Event', 1)
             return None
 
         event_time = self.get_event_time(event)
         if not event_time:
-            self.incr_counter('Event', 'Discard Missing Time Field', 1)
+            self.incr_counter(u'Event', u'Discard Missing Time Field', 1)
             return None
 
         # Don't use strptime to parse the date, it is extremely slow
@@ -283,7 +283,7 @@ def get_event_and_date_string(self, line):
         date_string = event_time.split("T")[0]
 
         if date_string < self.lower_bound_date_string or date_string >= self.upper_bound_date_string:
-            # Slow: self.incr_counter('Event', 'Discard Outside Date Interval', 1)
+            # Slow: self.incr_counter(u'Event', u'Discard Outside Date Interval', 1)
             return None
 
         return event, date_string
@@ -307,5 +307,5 @@ def get_map_input_file(self):
                 return os.environ['map_input_file']
             except KeyError:
                 log.warn('mapreduce_map_input_file not defined in os.environ, unable to determine input file path')
-                self.incr_counter('Event', 'Missing map_input_file', 1)
+                self.incr_counter(u'Event', u'Missing map_input_file', 1)
                 return ''
@@ -1,6 +1,8 @@
 """
 Gather data using Sqoop table dumps run on RDBMS databases.
 """
+from __future__ import absolute_import
+
 import datetime
 import json
 import logging
@@ -296,7 +298,12 @@ def run_job(self, job):
             metadata['end_time'] = datetime.datetime.utcnow().isoformat()
             try:
                 with job.metadata_output().open('w') as metadata_file:
-                    json.dump(metadata, metadata_file)
+                    # Under python 2, json.dumps() will return ascii-only bytes, so .encode('utf-8')
+                    # is a no-op.  Under python 3, json.dumps() will return ascii-only unicode, so
+                    # .encode('utf-8') will return bytes, thus normalizing the output to bytes
+                    # across all python versions.
+                    metadata_file.write(json.dumps(metadata).encode('utf-8'))
+                    metadata_file.flush()
             except Exception:
                 log.exception("Unable to dump metadata information.")
                 pass
 
@@ -197,12 +197,12 @@ def test_connect_with_columns(self):
         self.assertEquals(arglist[-3], 'column1,column2')
 
     def test_connect_with_null_string(self):
-        self.create_and_run_mysql_task(null_string='\\\\N')
+        self.create_and_run_mysql_task(null_string=r'\\N')
         arglist = self.get_call_args_after_run()
         self.assertEquals(arglist[-6], '--null-string')
-        self.assertEquals(arglist[-5], '\\\\N')
+        self.assertEquals(arglist[-5], r'\\N')
         self.assertEquals(arglist[-4], '--null-non-string')
-        self.assertEquals(arglist[-3], '\\\\N')
+        self.assertEquals(arglist[-3], r'\\N')
 
     def test_connect_with_fields_terminations(self):
         self.create_and_run_mysql_task(fields_terminated_by='\x01')
 
@@ -1,6 +1,7 @@
 """
 Support for loading data into an HP Vertica database.
 """
+from __future__ import absolute_import
 
 import logging
 import traceback
@@ -12,6 +13,7 @@
 from edx.analytics.tasks.util.overwrite import OverwriteOutputMixin
 from edx.analytics.tasks.util.url import ExternalURL
 from edx.analytics.tasks.util.vertica_target import CredentialFileVerticaTarget
+import six
 
 log = logging.getLogger(__name__)
 
@@ -416,7 +418,7 @@ def copy_delimiter(self):
     @property
     def copy_null_sequence(self):
         """The null sequence in the data to be copied.  Default is Hive NULL (\\N)"""
-        return "'\\N'"
+        return r"'\N'"
 
     @property
     def copy_enclosed_by(self):
@@ -437,7 +439,7 @@ def copy_escape_spec(self):
 
     def copy_data_table_from_target(self, cursor):
         """Performs the copy query from the insert source."""
-        if isinstance(self.columns[0], basestring):
+        if isinstance(self.columns[0], six.string_types):
             column_names = ','.join([name for name in self.columns])
         elif len(self.columns[0]) == 2:
             column_names = ','.join([name for name, _type in self.columns])