diff options
Diffstat (limited to 'yaksh/evaluator_tests')
-rw-r--r-- | yaksh/evaluator_tests/test_bash_evaluation.py | 261 | ||||
-rw-r--r-- | yaksh/evaluator_tests/test_c_cpp_evaluation.py | 330 | ||||
-rw-r--r-- | yaksh/evaluator_tests/test_grader_evaluation.py (renamed from yaksh/evaluator_tests/test_code_evaluation.py) | 19 | ||||
-rw-r--r-- | yaksh/evaluator_tests/test_java_evaluation.py | 329 | ||||
-rw-r--r-- | yaksh/evaluator_tests/test_python_evaluation.py | 214 |
5 files changed, 1138 insertions, 15 deletions
diff --git a/yaksh/evaluator_tests/test_bash_evaluation.py b/yaksh/evaluator_tests/test_bash_evaluation.py index 4b551d7..482d45e 100644 --- a/yaksh/evaluator_tests/test_bash_evaluation.py +++ b/yaksh/evaluator_tests/test_bash_evaluation.py @@ -269,5 +269,266 @@ class BashStdIOEvaluationTestCases(EvaluatorBaseTest): # Then self.assertTrue(result.get('success')) + +class BashHookEvaluationTestCases(EvaluatorBaseTest): + + def setUp(self): + self.f_path = os.path.join(tempfile.gettempdir(), "test.txt") + with open(self.f_path, 'wb') as f: + f.write('2'.encode('ascii')) + self.in_dir = tempfile.mkdtemp() + self.timeout_msg = ("Code took more than {0} seconds to run. " + "You probably have an infinite loop in your" + " code.").format(SERVER_TIMEOUT) + self.file_paths = None + + def tearDown(self): + os.remove(self.f_path) + shutil.rmtree(self.in_dir) + + def test_correct_answer(self): + # Given + user_answer = dedent(""" #!/bin/bash + echo -n Hello, world! + """ + ) + hook_code = dedent("""\ + def check_answer(user_answer): + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + proc = subprocess.Popen(user_answer, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + if stdout.decode("utf-8") == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'bash' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + + def test_incorrect_answer(self): + # Given + user_answer = dedent(""" #!/bin/bash + echo -n Goodbye, world! + """ + ) + hook_code = dedent("""\ + def check_answer(user_answer): + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + proc = subprocess.Popen(user_answer, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + if stdout.decode("utf-8") == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'bash' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output('Incorrect Answer', result.get('error')) + + def test_assert_with_hook(self): + # Given + user_answer = ("#!/bin/bash\n[[ $# -eq 2 ]]" + " && echo $(( $1 + $2 )) && exit $(( $1 + $2 ))" + ) + assert_test_case = dedent(""" + #!/bin/bash + [[ $# -eq 2 ]] && echo $(( $1 + $2 )) && exit $(( $1 + $2 )) + """) + + assert_test_case_args = "1 2\n2 1" + + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if "echo $(( $1 + $2 ))" in user_answer: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "standardtestcase", + "test_case": assert_test_case, + "test_case_args":assert_test_case_args, + 'weight': 1.0 + }, + {"test_case_type": "hooktestcase", + "hook_code": hook_code, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'bash' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 2.0) + + def test_multiple_hooks(self): + # Given + user_answer = dedent(""" #!/bin/bash + echo -n Hello, world! + """ + ) + + hook_code_1 = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if "echo -n Hello, world!" in user_answer: + success, err, mark_fraction = True, "", 0.5 + return success, err, mark_fraction + """ + ) + hook_code_2 = dedent("""\ + def check_answer(user_answer): + import subprocess + import sys + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + proc = subprocess.Popen(user_answer, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + + if stdout.decode('utf-8') == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code_1, 'weight': 1.0}, + {"test_case_type": "hooktestcase", + "hook_code": hook_code_2, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'bash' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 1.5) + + def test_infinite_loop(self): + # Given + user_answer = ("#!/bin/bash\nwhile [ 1 ] ;" + " do echo "" > /dev/null ; done") + + hook_code = dedent("""\ + def check_answer(user_answer): + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + proc = subprocess.Popen(user_answer, shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + if stdout.decode("utf-8") == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'bash' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output(self.timeout_msg, result.get('error')) + + if __name__ == '__main__': unittest.main() diff --git a/yaksh/evaluator_tests/test_c_cpp_evaluation.py b/yaksh/evaluator_tests/test_c_cpp_evaluation.py index d734cf2..304f1cb 100644 --- a/yaksh/evaluator_tests/test_c_cpp_evaluation.py +++ b/yaksh/evaluator_tests/test_c_cpp_evaluation.py @@ -639,5 +639,335 @@ class CppStdIOEvaluationTestCases(EvaluatorBaseTest): # Then self.assertTrue(result.get('success')) +class CppHookEvaluationTestCases(EvaluatorBaseTest): + + def setUp(self): + self.f_path = os.path.join(tempfile.gettempdir(), "test.txt") + with open(self.f_path, 'wb') as f: + f.write('2'.encode('ascii')) + tmp_in_dir_path = tempfile.mkdtemp() + self.in_dir = tmp_in_dir_path + self.timeout_msg = ("Code took more than {0} seconds to run. " + "You probably have an infinite loop in your" + " code.").format(SERVER_TIMEOUT) + self.file_paths = None + + def tearDown(self): + os.remove(self.f_path) + shutil.rmtree(self.in_dir) + + def test_correct_answer(self): + # Given + user_answer = dedent("""\ + #include<stdio.h> + main() + { + printf("Hello, world!"); + } + """) + hook_code = dedent("""\ + def check_answer(user_answer): + with open("Test.c", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["gcc Test.c", "./a.out"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout.decode("utf-8") == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'cpp' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + + def test_incorrect_answer(self): + # Given + user_answer = dedent("""\ + #include<stdio.h> + main() + { + printf("Goodbye, world!"); + } + """) + hook_code = dedent("""\ + def check_answer(user_answer): + with open("Test.c", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["gcc Test.c", "./a.out"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout.decode("utf-8") == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'cpp' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output('Incorrect Answer', result.get('error')) + + def test_assert_with_hook(self): + # Given + user_answer = "int add(int a, int b)\n{return a+b;}" + + + assert_test_case = dedent("""\ + #include <stdio.h> + #include <stdlib.h> + + extern int add(int, int); + + template <class T> + + void check(T expect, T result) + { + if (expect == result) + { + printf("Correct: Expected %d got %d ",expect,result); + } + else + { + printf("Incorrect: Expected %d got %d ",expect,result); + exit (1); + } + } + + int main(void) + { + int result; + result = add(0,0); + printf("Input submitted to the function: 0, 0"); + check(0, result); + result = add(2,3); + printf("Input submitted to the function: 2 3"); + check(5,result); + printf("All Correct"); + return 0; + } + """) + + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if "return a+b;" in user_answer: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "standardtestcase", + "test_case": assert_test_case, + 'weight': 1.0 + }, + {"test_case_type": "hooktestcase", + "hook_code": hook_code, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'cpp' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 2.0) + + def test_multiple_hooks(self): + # Given + user_answer = dedent("""\ + #include<stdio.h> + main() + { + printf("Hello, world!"); + } + """) + + hook_code_1 = dedent("""\ + def check_answer(user_answer): + with open("Test.c", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["gcc Test.c", "./a.out"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout.decode("utf-8") == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + hook_code_2 = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if 'printf("Hello, world!");' in user_answer: + success, err, mark_fraction = True, "", 0.5 + return success, err, mark_fraction + """ + ) + + + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code_1, 'weight': 1.0}, + {"test_case_type": "hooktestcase", + "hook_code": hook_code_2, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'cpp' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 1.5) + + def test_infinite_loop(self): + # Given + user_answer = dedent("""\ + #include<stdio.h> + int main(void){ + while(0==0){ + printf("abc");} + }""") + + hook_code= dedent("""\ + def check_answer(user_answer): + with open("Test.c", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["gcc Test.c", "./a.out"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout.decode("utf-8") == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'cpp' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output(self.timeout_msg, result.get('error')) + + if __name__ == '__main__': unittest.main() diff --git a/yaksh/evaluator_tests/test_code_evaluation.py b/yaksh/evaluator_tests/test_grader_evaluation.py index cb783b0..d11f4a0 100644 --- a/yaksh/evaluator_tests/test_code_evaluation.py +++ b/yaksh/evaluator_tests/test_grader_evaluation.py @@ -16,34 +16,27 @@ class RegistryTestCase(unittest.TestCase): stdio_evaluator_path = ("yaksh.python_stdio_evaluator." "PythonStdIOEvaluator" ) + + hook_evaluator_path = ("yaksh.hook_evaluator." + "HookEvaluator" + ) code_evaluators['python'] = \ {"standardtestcase": assertion_evaluator_path, - "stdiobasedtestcase": stdio_evaluator_path + "stdiobasedtestcase": stdio_evaluator_path, + "hooktestcase": hook_evaluator_path } def test_set_register(self): evaluator_class = self.registry_object.get_class("python", "standardtestcase" ) - assertion_evaluator_path = ("yaksh.python_assertion_evaluator" - ".PythonAssertionEvaluator" - ) - stdio_evaluator_path = ("yaksh.python_stdio_evaluator." - "PythonStdIOEvaluator" - ) class_name = getattr(python_assertion_evaluator, 'PythonAssertionEvaluator' ) - self.registry_object.register("python", - {"standardtestcase": assertion_evaluator_path, - "stdiobasedtestcase": stdio_evaluator_path - } - ) self.assertEqual(evaluator_class, class_name) def tearDown(self): self.registry_object = None - if __name__ == '__main__': unittest.main() diff --git a/yaksh/evaluator_tests/test_java_evaluation.py b/yaksh/evaluator_tests/test_java_evaluation.py index b53d8aa..3d127af 100644 --- a/yaksh/evaluator_tests/test_java_evaluation.py +++ b/yaksh/evaluator_tests/test_java_evaluation.py @@ -507,5 +507,334 @@ class JavaStdIOEvaluationTestCases(EvaluatorBaseTest): self.assertTrue(result.get("success")) +class JavaHookEvaluationTestCases(EvaluatorBaseTest): + + def setUp(self): + self.f_path = os.path.join(tempfile.gettempdir(), "test.txt") + with open(self.f_path, 'wb') as f: + f.write('2'.encode('ascii')) + tmp_in_dir_path = tempfile.mkdtemp() + self.in_dir = tmp_in_dir_path + self.file_paths = None + gd.SERVER_TIMEOUT = 9 + self.timeout_msg = ("Code took more than {0} seconds to run. " + "You probably have an infinite loop in" + " your code.").format(gd.SERVER_TIMEOUT) + + def tearDown(self): + gd.SERVER_TIMEOUT = 4 + os.remove(self.f_path) + shutil.rmtree(self.in_dir) + + def test_correct_answer(self): + # Given + user_answer = dedent("""\ + class Test + {public static void main(String[] args){ + System.out.print("Hello, world!"); + }} + """) + hook_code = dedent("""\ + def check_answer(user_answer): + with open("Test.java", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["javac Test.java", "java Test"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout.decode("utf-8") == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'java' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + + def test_incorrect_answer(self): + # Given + user_answer = dedent("""\ + class Test + {public static void main(String[] args){ + System.out.print("Goodbye, world!"); + }} + """) + hook_code = dedent("""\ + def check_answer(user_answer): + with open("Test.java", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["javac Test.java", "java Test"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout.decode("utf-8") == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'java' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output('Incorrect Answer', result.get('error')) + + def test_assert_with_hook(self): + # Given + user_answer = "class Test {\n\tint square_num(int a) {\n\treturn a*a;\n\t}\n}" + assert_test_case = dedent(""" + class main + { + public static <E> void check(E expect, E result) + { + if(result.equals(expect)) + { + System.out.println("Correct:Output expected "+expect+" and got "+result); + } + else + { + System.out.println("Incorrect:Output expected "+expect+" but got "+result); + System.exit(1); + } + } + public static void main(String arg[]) + { + Test t = new Test(); + int result, input, output; + input = 0; output = 0; + result = t.square_num(input); + System.out.println("Input submitted to the function: "+input); + check(output, result); + input = 5; output = 25; + result = t.square_num(input); + System.out.println("Input submitted to the function: "+input); + check(output, result); + input = 6; output = 36; + result = t.square_num(input); + System.out.println("Input submitted to the function: "+input); + check(output, result); + } + } + """) + + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if "return a*a" in user_answer: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "standardtestcase", + "test_case": assert_test_case, + 'weight': 1.0 + }, + {"test_case_type": "hooktestcase", + "hook_code": hook_code, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'java' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 2.0) + + def test_multiple_hooks(self): + # Given + user_answer = dedent("""\ + class Test + {public static void main(String[] args){ + System.out.print("Hello, world!"); + }} + """) + + hook_code_1 = dedent("""\ + def check_answer(user_answer): + with open("Test.java", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["javac Test.java", "java Test"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout.decode("utf-8") == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + hook_code_2 = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if 'System.out.print("Hello, world!");' in user_answer: + success, err, mark_fraction = True, "", 0.5 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code_1, 'weight': 1.0}, + {"test_case_type": "hooktestcase", + "hook_code": hook_code_2, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'java' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 1.5) + + def test_infinite_loop(self): + # Given + user_answer = dedent("""\ + class Test + {public static void main(String[] args){ + while(0==0) + { + System.out.print("a");} + }}""") + + hook_code = dedent("""\ + def check_answer(user_answer): + with open("Test.java", "w+") as f: + f.write(user_answer) + import subprocess + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + def _run_command(cmd): + proc = subprocess.Popen("{}".format(cmd), + shell=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE + ) + stdout,stderr = proc.communicate() + return stdout,stderr + cmds = ["javac Test.java", "java Test"] + for cmd in cmds: + stdout, stderr = _run_command(cmd) + if stdout.decode("utf-8") == "Hello, world!": + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'java' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output(self.timeout_msg, result.get('error')) + + if __name__ == '__main__': unittest.main() diff --git a/yaksh/evaluator_tests/test_python_evaluation.py b/yaksh/evaluator_tests/test_python_evaluation.py index 43dfe6b..1fba73e 100644 --- a/yaksh/evaluator_tests/test_python_evaluation.py +++ b/yaksh/evaluator_tests/test_python_evaluation.py @@ -7,8 +7,6 @@ from textwrap import dedent # Local import from yaksh.grader import Grader -from yaksh.python_assertion_evaluator import PythonAssertionEvaluator -from yaksh.python_stdio_evaluator import PythonStdIOEvaluator from yaksh.settings import SERVER_TIMEOUT @@ -643,5 +641,217 @@ class PythonStdIOEvaluationTestCases(EvaluatorBaseTest): self.assertFalse(result.get('success')) +class PythonHookEvaluationTestCases(EvaluatorBaseTest): + + def setUp(self): + with open('/tmp/test.txt', 'wb') as f: + f.write('2'.encode('ascii')) + tmp_in_dir_path = tempfile.mkdtemp() + self.in_dir = tmp_in_dir_path + self.timeout_msg = ("Code took more than {0} seconds to run. " + "You probably have an infinite loop in" + " your code.").format(SERVER_TIMEOUT) + self.file_paths = None + + def tearDown(self): + os.remove('/tmp/test.txt') + shutil.rmtree(self.in_dir) + + def test_correct_answer(self): + # Given + user_answer = "def add(a,b):\n\treturn a + b" + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + exec(user_answer, globals()) + if add(1,2) == 3: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'python' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + + def test_incorrect_answer(self): + # Given + user_answer = "def add(a,b):\n\treturn a - b" + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + exec(user_answer, globals()) + if add(1,2) == 3: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'python' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output('Incorrect Answer', result.get('error')) + + def test_assert_with_hook(self): + # Given + user_answer = "def add(a,b):\n\treturn a + b" + assert_test_case = "assert add(1,2) == 3" + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if "return a + b" in user_answer: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + test_case_data = [{"test_case_type": "standardtestcase", + "test_case": assert_test_case, 'weight': 1.0}, + {"test_case_type": "hooktestcase", + "hook_code": hook_code, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'python' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 2.0) + + def test_multiple_hooks(self): + # Given + user_answer = "def add(a,b):\n\treturn a + b" + hook_code_1 = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + if "return a + b" in user_answer: + success, err, mark_fraction = True, "", 0.5 + return success, err, mark_fraction + """ + ) + hook_code_2 = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + exec(user_answer, globals()) + if add(1,2) == 3: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + + + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code_1, 'weight': 1.0}, + {"test_case_type": "hooktestcase", + "hook_code": hook_code_2, 'weight': 1.0}, + ] + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': True, + 'language': 'python' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertTrue(result.get('success')) + self.assertEqual(result.get("weight"), 1.5) + + def test_infinite_loop(self): + # Given + user_answer = "def add(a, b):\n\twhile True:\n\t\tpass" + hook_code = dedent("""\ + def check_answer(user_answer): + success = False + err = "Incorrect Answer" + mark_fraction = 0.0 + exec(user_answer, globals()) + if add(1,2) == 3: + success, err, mark_fraction = True, "", 1.0 + return success, err, mark_fraction + """ + ) + test_case_data = [{"test_case_type": "hooktestcase", + "hook_code": hook_code,"weight": 1.0 + }] + + kwargs = { + 'metadata': { + 'user_answer': user_answer, + 'file_paths': self.file_paths, + 'partial_grading': False, + 'language': 'python' + }, + 'test_case_data': test_case_data, + } + + # When + grader = Grader(self.in_dir) + result = grader.evaluate(kwargs) + + # Then + self.assertFalse(result.get('success')) + self.assert_correct_output(self.timeout_msg, result.get('error')) + + if __name__ == '__main__': unittest.main() |