3 files changed, 103 insertions, 32 deletions
diff --git a/yaksh/evaluator_tests/test_python_evaluation.py b/yaksh/evaluator_tests/test_python_evaluation.py
index 1fba73e..82cf4c3 100644
--- a/yaksh/evaluator_tests/test_python_evaluation.py
+++ b/yaksh/evaluator_tests/test_python_evaluation.py
@@ -400,7 +400,7 @@ class PythonAssertionEvaluationTestCases(EvaluatorBaseTest):
         result = grader.evaluate(kwargs)
         error_as_str = ''.join(result.get("error"))
         err = error_as_str.splitlines()
-        
+
         # Then
         self.assertFalse(result.get("success"))
         self.assertEqual(5, len(err))
@@ -578,7 +578,10 @@ class PythonStdIOEvaluationTestCases(EvaluatorBaseTest):
 
         # Then
         self.assertFalse(result.get('success'))
-        self.assert_correct_output("Incorrect answer", result.get('error'))
+        self.assert_correct_output(
+            "ERROR:\nExpected:\n3\nGiven:\n-1\n\nError in line 1 of output.",
+            result.get('error')
+        )
 
     def test_file_based_answer(self):
         # Given
@@ -728,7 +731,7 @@ class PythonHookEvaluationTestCases(EvaluatorBaseTest):
         # Then
         self.assertFalse(result.get('success'))
         self.assert_correct_output('Incorrect Answer', result.get('error'))
-    
+
     def test_assert_with_hook(self):
         # Given
         user_answer = "def add(a,b):\n\treturn a + b"
@@ -815,7 +818,7 @@ class PythonHookEvaluationTestCases(EvaluatorBaseTest):
         # Then
         self.assertTrue(result.get('success'))
         self.assertEqual(result.get("weight"), 1.5)
-        
+
     def test_infinite_loop(self):
         # Given
         user_answer = "def add(a, b):\n\twhile True:\n\t\tpass"
diff --git a/yaksh/evaluator_tests/test_python_stdio_evaluator.py b/yaksh/evaluator_tests/test_python_stdio_evaluator.py
new file mode 100644
index 0000000..db5028a
--- /dev/null
+++ b/yaksh/evaluator_tests/test_python_stdio_evaluator.py
@@ -0,0 +1,62 @@
+from textwrap import dedent
+
+from yaksh.python_stdio_evaluator import compare_outputs
+
+
+def test_compare_outputs():
+    exp = "5\n5\n"
+    given = "5\n5\n"
+    success, msg = compare_outputs(given, exp)
+    assert success
+
+    exp = "5\n5\n"
+    given = "5\n5"
+    success, msg = compare_outputs(given, exp)
+    assert success
+
+    exp = "5\r5"
+    given = "5\n5"
+    success, msg = compare_outputs(given, exp)
+    assert success
+
+    exp = " 5 \r 5 "
+    given = "  5  \n  5  "
+    success, msg = compare_outputs(given, exp)
+    assert success
+
+    exp = "5\n5\n"
+    given = "5 5"
+    success, msg = compare_outputs(given, exp)
+    assert not success
+    m = dedent("""\
+    ERROR: Got 1 lines in output, we expected 2.
+    Expected:
+    5
+    5
+
+    Given:
+    5 5
+    """)
+    assert m == msg
+
+    exp = "5\n5\n"
+    given = "5\n6"
+    success, msg = compare_outputs(given, exp)
+    assert not success
+    m = dedent("""\
+    ERROR:
+    Expected:
+    5
+    5
+
+    Given:
+    5
+    6
+
+    Error in line 2 of output.
+    Expected line 2:
+    5
+    Given line 2:
+    6
+    """)
+    assert m == msg
diff --git a/yaksh/python_stdio_evaluator.py b/yaksh/python_stdio_evaluator.py
index 67f57a9..27bf69b 100644
--- a/yaksh/python_stdio_evaluator.py
+++ b/yaksh/python_stdio_evaluator.py
@@ -1,12 +1,7 @@
-#!/usr/bin/env python
 from __future__ import unicode_literals
 import sys
-import traceback
-import os
-from os.path import join
-import importlib
 from contextlib import contextmanager
-from textwrap import dedent
+
 
 try:
     from StringIO import StringIO
@@ -28,6 +23,36 @@ def redirect_stdout():
         sys.stdout = old_target  # restore to the previous value
 
 
+def _show_expected_given(expected, given):
+    return "Expected:\n{0}\nGiven:\n{1}\n".format(expected, given)
+
+
+def compare_outputs(given, expected):
+    given_lines = given.splitlines()
+    ng = len(given_lines)
+    exp_lines = expected.splitlines()
+    ne = len(exp_lines)
+    if ng != ne:
+        msg = "ERROR: Got {0} lines in output, we expected {1}.\n".format(
+            ng, ne
+        )
+        msg += _show_expected_given(expected, given)
+        return False, msg
+    else:
+        for i, (given_line, expected_line) in \
+           enumerate(zip(given_lines, exp_lines)):
+            if given_line.strip() != expected_line.strip():
+                msg = "ERROR:\n"
+                msg += _show_expected_given(expected, given)
+                msg += "\nError in line %d of output.\n" % (i+1)
+                msg += "Expected line {0}:\n{1}\nGiven line {0}:\n{2}\n"\
+                       .format(
+                           i+1, expected_line, given_line
+                       )
+                return False, msg
+    return True, "Correct answer."
+
+
 class PythonStdIOEvaluator(BaseEvaluator):
     """Tests the Python code obtained from Code Server"""
     def __init__(self, metadata, test_case_data):
@@ -41,7 +66,7 @@ class PythonStdIOEvaluator(BaseEvaluator):
         # Set test case data values
         self.expected_input = test_case_data.get('expected_input')
         self.expected_output = test_case_data.get('expected_output')
-        self.weight = test_case_data.get('weight')        
+        self.weight = test_case_data.get('weight')
 
     def teardown(self):
         # Delete the created file.
@@ -64,25 +89,6 @@ class PythonStdIOEvaluator(BaseEvaluator):
         return self.output_value
 
     def check_code(self):
-        success = False
-        mark_fraction = 0.0
-
-        tb = None
-        if self.output_value == self.expected_output:
-            success = True
-            err = None
-            mark_fraction = self.weight
-        else:
-            success = False
-            err = dedent("""
-                Incorrect answer:
-                Given input - {0}
-                Expected output - {1}
-                Your output - {2}
-                """.format(self.expected_input,
-                     self.expected_output,
-                     self.output_value
-                    )
-                )
-        del tb
+        mark_fraction = self.weight
+        success, err = compare_outputs(self.output_value, self.expected_output)
         return success, err, mark_fraction