From ec5cb67b3314d05675c501cd78ce92d02ae8fde4 Mon Sep 17 00:00:00 2001 From: maheshgudi Date: Fri, 6 Jan 2017 12:33:26 +0530 Subject: added test cases for hook evaluator --- yaksh/evaluator_tests/test_bash_evaluation.py | 259 +++++++++++++++++++ yaksh/evaluator_tests/test_c_cpp_evaluation.py | 330 ++++++++++++++++++++++++ yaksh/evaluator_tests/test_code_evaluation.py | 49 ---- yaksh/evaluator_tests/test_grader_evaluation.py | 42 +++ yaksh/evaluator_tests/test_java_evaluation.py | 329 +++++++++++++++++++++++ yaksh/evaluator_tests/test_python_evaluation.py | 214 ++++++++++++++- 6 files changed, 1172 insertions(+), 51 deletions(-) delete mode 100644 yaksh/evaluator_tests/test_code_evaluation.py create mode 100644 yaksh/evaluator_tests/test_grader_evaluation.py (limited to 'yaksh') diff --git a/yaksh/evaluator_tests/test_bash_evaluation.py b/yaksh/evaluator_tests/test_bash_evaluation.py index 4b551d7..0662831 100644 --- a/yaksh/evaluator_tests/test_bash_evaluation.py +++ b/yaksh/evaluator_tests/test_bash_evaluation.py @@ -269,5 +269,264 @@ class BashStdIOEvaluationTestCases(EvaluatorBaseTest): # Then self.assertTrue(result.get('success')) + +class BashHookEvaluationTestCases(EvaluatorBaseTest): + + def setUp(self): + self.f_path = os.path.join(tempfile.gettempdir(), "test.txt") + with open(self.f_path, 'wb') as f: + f.write('2'.encode('ascii')) + self.in_dir = tempfile.mkdtemp() + self.timeout_msg = ("Code took more than {0} seconds to run. " + "You probably have an infinite loop in your" + " code.").format(SERVER_TIMEOUT) + self.file_paths = None + + def tearDown(self): + os.remove(self.f_path) + shutil.rmtree(self.in_dir) + + def test_correct_answer(self): + # Given + user_answer = dedent(""" #!/bin/bash + echo -n Hello, world! + """ + ) + hook_code = dedent("""\ + def check_answer(user_answer): + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + proc = subprocess.Popen(user_answer, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + if stdout == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'bash' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + + def test_incorrect_answer(self): + # Given + user_answer = dedent(""" #!/bin/bash + echo -n Goodbye, world! + """ + ) + hook_code = dedent("""\ + def check_answer(user_answer): + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + proc = subprocess.Popen(user_answer, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + if stdout == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'bash' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output('Incorrect Answer', result.get('error')) + + def test_assert_with_hook(self): + # Given + user_answer = ("#!/bin/bash\n[[ $# -eq 2 ]]" + " && echo $(( $1 + $2 )) && exit $(( $1 + $2 ))" + ) + assert_test_case = dedent(""" + #!/bin/bash + [[ $# -eq 2 ]] && echo $(( $1 + $2 )) && exit $(( $1 + $2 )) + """) + + assert_test_case_args = "1 2\n2 1" + + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if "echo $(( $1 + $2 ))" in user_answer: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "standardtestcase", + "test_case": assert_test_case, + "test_case_args":assert_test_case_args, + 'weight': 1.0 + }, + {"test_case_type": "hooktestcase", + "hook_code": hook_code, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'bash' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 2.0) + + def test_multiple_hooks(self): + # Given + user_answer = dedent(""" #!/bin/bash + echo -n Hello, world! + """ + ) + + hook_code_1 = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if "echo -n Hello, world!" in user_answer: + success, err, mark_fraction = True, "", 0.5 + return success, err, mark_fraction + """ + ) + hook_code_2 = dedent("""\ + def check_answer(user_answer): + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + proc = subprocess.Popen(user_answer, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + if stdout == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code_1, 'weight': 1.0}, + {"test_case_type": "hooktestcase", + "hook_code": hook_code_2, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'bash' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 1.5) + + def test_infinite_loop(self): + # Given + user_answer = ("#!/bin/bash\nwhile [ 1 ] ;" + " do echo "" > /dev/null ; done") + + hook_code = dedent("""\ + def check_answer(user_answer): + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + proc = subprocess.Popen(user_answer, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + if stdout == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'bash' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output(self.timeout_msg, result.get('error')) + + if __name__ == '__main__': unittest.main() diff --git a/yaksh/evaluator_tests/test_c_cpp_evaluation.py b/yaksh/evaluator_tests/test_c_cpp_evaluation.py index d734cf2..bba7bc7 100644 --- a/yaksh/evaluator_tests/test_c_cpp_evaluation.py +++ b/yaksh/evaluator_tests/test_c_cpp_evaluation.py @@ -639,5 +639,335 @@ class CppStdIOEvaluationTestCases(EvaluatorBaseTest): # Then self.assertTrue(result.get('success')) +class CppHookEvaluationTestCases(EvaluatorBaseTest): + + def setUp(self): + self.f_path = os.path.join(tempfile.gettempdir(), "test.txt") + with open(self.f_path, 'wb') as f: + f.write('2'.encode('ascii')) + tmp_in_dir_path = tempfile.mkdtemp() + self.in_dir = tmp_in_dir_path + self.timeout_msg = ("Code took more than {0} seconds to run. " + "You probably have an infinite loop in your" + " code.").format(SERVER_TIMEOUT) + self.file_paths = None + + def tearDown(self): + os.remove(self.f_path) + shutil.rmtree(self.in_dir) + + def test_correct_answer(self): + # Given + user_answer = dedent("""\ + #include + main() + { + printf("Hello, world!"); + } + """) + hook_code = dedent("""\ + def check_answer(user_answer): + with open("Test.c", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["gcc Test.c", "./a.out"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'cpp' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + + def test_incorrect_answer(self): + # Given + user_answer = dedent("""\ + #include + main() + { + printf("Goodbye, world!"); + } + """) + hook_code = dedent("""\ + def check_answer(user_answer): + with open("Test.c", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["gcc Test.c", "./a.out"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'cpp' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output('Incorrect Answer', result.get('error')) + + def test_assert_with_hook(self): + # Given + user_answer = "int add(int a, int b)\n{return a+b;}" + + + assert_test_case = dedent("""\ + #include + #include + + extern int add(int, int); + + template + + void check(T expect, T result) + { + if (expect == result) + { + printf("Correct: Expected %d got %d ",expect,result); + } + else + { + printf("Incorrect: Expected %d got %d ",expect,result); + exit (1); + } + } + + int main(void) + { + int result; + result = add(0,0); + printf("Input submitted to the function: 0, 0"); + check(0, result); + result = add(2,3); + printf("Input submitted to the function: 2 3"); + check(5,result); + printf("All Correct"); + return 0; + } + """) + + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if "return a+b;" in user_answer: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "standardtestcase", + "test_case": assert_test_case, + 'weight': 1.0 + }, + {"test_case_type": "hooktestcase", + "hook_code": hook_code, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'cpp' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 2.0) + + def test_multiple_hooks(self): + # Given + user_answer = dedent("""\ + #include + main() + { + printf("Hello, world!"); + } + """) + + hook_code_1 = dedent("""\ + def check_answer(user_answer): + with open("Test.c", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["gcc Test.c", "./a.out"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + hook_code_2 = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if 'printf("Hello, world!");' in user_answer: + success, err, mark_fraction = True, "", 0.5 + return success, err, mark_fraction + """ + ) + + + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code_1, 'weight': 1.0}, + {"test_case_type": "hooktestcase", + "hook_code": hook_code_2, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'cpp' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 1.5) + + def test_infinite_loop(self): + # Given + user_answer = dedent("""\ + #include + int main(void){ + while(0==0){ + printf("abc");} + }""") + + hook_code= dedent("""\ + def check_answer(user_answer): + with open("Test.c", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["gcc Test.c", "./a.out"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'cpp' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output(self.timeout_msg, result.get('error')) + + if __name__ == '__main__': unittest.main() diff --git a/yaksh/evaluator_tests/test_code_evaluation.py b/yaksh/evaluator_tests/test_code_evaluation.py deleted file mode 100644 index cb783b0..0000000 --- a/yaksh/evaluator_tests/test_code_evaluation.py +++ /dev/null @@ -1,49 +0,0 @@ -from __future__ import unicode_literals -import unittest -import os -from yaksh import python_assertion_evaluator -from yaksh.language_registry import _LanguageRegistry, get_registry -from yaksh.settings import SERVER_TIMEOUT, code_evaluators - - -class RegistryTestCase(unittest.TestCase): - def setUp(self): - self.registry_object = get_registry() - self.language_registry = _LanguageRegistry() - assertion_evaluator_path = ("yaksh.python_assertion_evaluator" - ".PythonAssertionEvaluator" - ) - stdio_evaluator_path = ("yaksh.python_stdio_evaluator." - "PythonStdIOEvaluator" - ) - code_evaluators['python'] = \ - {"standardtestcase": assertion_evaluator_path, - "stdiobasedtestcase": stdio_evaluator_path - } - - def test_set_register(self): - evaluator_class = self.registry_object.get_class("python", - "standardtestcase" - ) - assertion_evaluator_path = ("yaksh.python_assertion_evaluator" - ".PythonAssertionEvaluator" - ) - stdio_evaluator_path = ("yaksh.python_stdio_evaluator." - "PythonStdIOEvaluator" - ) - class_name = getattr(python_assertion_evaluator, - 'PythonAssertionEvaluator' - ) - self.registry_object.register("python", - {"standardtestcase": assertion_evaluator_path, - "stdiobasedtestcase": stdio_evaluator_path - } - ) - self.assertEqual(evaluator_class, class_name) - - def tearDown(self): - self.registry_object = None - - -if __name__ == '__main__': - unittest.main() diff --git a/yaksh/evaluator_tests/test_grader_evaluation.py b/yaksh/evaluator_tests/test_grader_evaluation.py new file mode 100644 index 0000000..d11f4a0 --- /dev/null +++ b/yaksh/evaluator_tests/test_grader_evaluation.py @@ -0,0 +1,42 @@ +from __future__ import unicode_literals +import unittest +import os +from yaksh import python_assertion_evaluator +from yaksh.language_registry import _LanguageRegistry, get_registry +from yaksh.settings import SERVER_TIMEOUT, code_evaluators + + +class RegistryTestCase(unittest.TestCase): + def setUp(self): + self.registry_object = get_registry() + self.language_registry = _LanguageRegistry() + assertion_evaluator_path = ("yaksh.python_assertion_evaluator" + ".PythonAssertionEvaluator" + ) + stdio_evaluator_path = ("yaksh.python_stdio_evaluator." + "PythonStdIOEvaluator" + ) + + hook_evaluator_path = ("yaksh.hook_evaluator." + "HookEvaluator" + ) + code_evaluators['python'] = \ + {"standardtestcase": assertion_evaluator_path, + "stdiobasedtestcase": stdio_evaluator_path, + "hooktestcase": hook_evaluator_path + } + + def test_set_register(self): + evaluator_class = self.registry_object.get_class("python", + "standardtestcase" + ) + class_name = getattr(python_assertion_evaluator, + 'PythonAssertionEvaluator' + ) + self.assertEqual(evaluator_class, class_name) + + def tearDown(self): + self.registry_object = None + +if __name__ == '__main__': + unittest.main() diff --git a/yaksh/evaluator_tests/test_java_evaluation.py b/yaksh/evaluator_tests/test_java_evaluation.py index b53d8aa..8c9fe90 100644 --- a/yaksh/evaluator_tests/test_java_evaluation.py +++ b/yaksh/evaluator_tests/test_java_evaluation.py @@ -507,5 +507,334 @@ class JavaStdIOEvaluationTestCases(EvaluatorBaseTest): self.assertTrue(result.get("success")) +class JavaHookEvaluationTestCases(EvaluatorBaseTest): + + def setUp(self): + self.f_path = os.path.join(tempfile.gettempdir(), "test.txt") + with open(self.f_path, 'wb') as f: + f.write('2'.encode('ascii')) + tmp_in_dir_path = tempfile.mkdtemp() + self.in_dir = tmp_in_dir_path + self.file_paths = None + gd.SERVER_TIMEOUT = 9 + self.timeout_msg = ("Code took more than {0} seconds to run. " + "You probably have an infinite loop in" + " your code.").format(gd.SERVER_TIMEOUT) + + def tearDown(self): + gd.SERVER_TIMEOUT = 4 + os.remove(self.f_path) + shutil.rmtree(self.in_dir) + + def test_correct_answer(self): + # Given + user_answer = dedent("""\ + class Test + {public static void main(String[] args){ + System.out.print("Hello, world!"); + }} + """) + hook_code = dedent("""\ + def check_answer(user_answer): + with open("Test.java", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["javac Test.java", "java Test"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'java' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + + def test_incorrect_answer(self): + # Given + user_answer = dedent("""\ + class Test + {public static void main(String[] args){ + System.out.print("Goodbye, world!"); + }} + """) + hook_code = dedent("""\ + def check_answer(user_answer): + with open("Test.java", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["javac Test.java", "java Test"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'java' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output('Incorrect Answer', result.get('error')) + + def test_assert_with_hook(self): + # Given + user_answer = "class Test {\n\tint square_num(int a) {\n\treturn a*a;\n\t}\n}" + assert_test_case = dedent(""" + class main + { + public static void check(E expect, E result) + { + if(result.equals(expect)) + { + System.out.println("Correct:Output expected "+expect+" and got "+result); + } + else + { + System.out.println("Incorrect:Output expected "+expect+" but got "+result); + System.exit(1); + } + } + public static void main(String arg[]) + { + Test t = new Test(); + int result, input, output; + input = 0; output = 0; + result = t.square_num(input); + System.out.println("Input submitted to the function: "+input); + check(output, result); + input = 5; output = 25; + result = t.square_num(input); + System.out.println("Input submitted to the function: "+input); + check(output, result); + input = 6; output = 36; + result = t.square_num(input); + System.out.println("Input submitted to the function: "+input); + check(output, result); + } + } + """) + + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if "return a*a" in user_answer: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "standardtestcase", + "test_case": assert_test_case, + 'weight': 1.0 + }, + {"test_case_type": "hooktestcase", + "hook_code": hook_code, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'java' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 2.0) + + def test_multiple_hooks(self): + # Given + user_answer = dedent("""\ + class Test + {public static void main(String[] args){ + System.out.print("Hello, world!"); + }} + """) + + hook_code_1 = dedent("""\ + def check_answer(user_answer): + with open("Test.java", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["javac Test.java", "java Test"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + hook_code_2 = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if 'System.out.print("Hello, world!");' in user_answer: + success, err, mark_fraction = True, "", 0.5 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code_1, 'weight': 1.0}, + {"test_case_type": "hooktestcase", + "hook_code": hook_code_2, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'java' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 1.5) + + def test_infinite_loop(self): + # Given + user_answer = dedent("""\ + class Test + {public static void main(String[] args){ + while(0==0) + { + System.out.print("a");} + }}""") + + hook_code = dedent("""\ + def check_answer(user_answer): + with open("Test.java", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["javac Test.java", "java Test"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'java' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output(self.timeout_msg, result.get('error')) + + if __name__ == '__main__': unittest.main() diff --git a/yaksh/evaluator_tests/test_python_evaluation.py b/yaksh/evaluator_tests/test_python_evaluation.py index c58d7f1..a9f8122 100644 --- a/yaksh/evaluator_tests/test_python_evaluation.py +++ b/yaksh/evaluator_tests/test_python_evaluation.py @@ -7,8 +7,6 @@ from textwrap import dedent # Local import from yaksh.grader import Grader -from yaksh.python_assertion_evaluator import PythonAssertionEvaluator -from yaksh.python_stdio_evaluator import PythonStdIOEvaluator from yaksh.settings import SERVER_TIMEOUT @@ -643,5 +641,217 @@ class PythonStdIOEvaluationTestCases(EvaluatorBaseTest): self.assertFalse(result.get('success')) +class PythonHookEvaluationTestCases(EvaluatorBaseTest): + + def setUp(self): + with open('/tmp/test.txt', 'wb') as f: + f.write('2'.encode('ascii')) + tmp_in_dir_path = tempfile.mkdtemp() + self.in_dir = tmp_in_dir_path + self.timeout_msg = ("Code took more than {0} seconds to run. " + "You probably have an infinite loop in" + " your code.").format(SERVER_TIMEOUT) + self.file_paths = None + + def tearDown(self): + os.remove('/tmp/test.txt') + shutil.rmtree(self.in_dir) + + def test_correct_answer(self): + # Given + user_answer = "def add(a,b):\n\treturn a + b" + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + exec(user_answer) + if add(1,2) == 3: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'python' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + + def test_incorrect_answer(self): + # Given + user_answer = "def add(a,b):\n\treturn a - b" + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + exec user_answer + if add(1,2) == 3: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'python' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output('Incorrect Answer', result.get('error')) + + def test_assert_with_hook(self): + # Given + user_answer = "def add(a,b):\n\treturn a + b" + assert_test_case = "assert add(1,2) == 3" + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if "return a + b" in user_answer: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "standardtestcase", + "test_case": assert_test_case, 'weight': 1.0}, + {"test_case_type": "hooktestcase", + "hook_code": hook_code, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'python' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 2.0) + + def test_multiple_hooks(self): + # Given + user_answer = "def add(a,b):\n\treturn a + b" + hook_code_1 = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if "return a + b" in user_answer: + success, err, mark_fraction = True, "", 0.5 + return success, err, mark_fraction + """ + ) + hook_code_2 = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + exec(user_answer) + if add(1,2) == 3: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code_1, 'weight': 1.0}, + {"test_case_type": "hooktestcase", + "hook_code": hook_code_2, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'python' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 1.5) + + def test_infinite_loop(self): + # Given + user_answer = "def add(a, b):\n\twhile True:\n\t\tpass" + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + exec(user_answer) + if add(1,2) == 3: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'python' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output(self.timeout_msg, result.get('error')) + + if __name__ == '__main__': unittest.main() -- cgit