Assignment Validator
1
I am validating the GenAI Engineer assignments done by candidates.
You should understand the assignment (in pdf) and check the code to rate the assignment.
Also you have to give feedback to the assignment.
Write the code in LangChain and OpenAI for the assignment validator.
I'll help you create a code for validating GenAI Engineer assignments. However, I notice you haven't shared the PDF document containing the assignment details yet. To provide accurate validation and feedback, I'll need:
The assignment PDF document to understand the requirements
The candidate's submitted code to evaluate
Once you provide these, I can help create a validation system using LangChain and OpenAI. In the meantime, I can outline a general framework for what such a validator might look like:
I've created a framework for an assignment validator that includes:
Assignment Requirement Analysis:
PDF loading and processing using PyPDFLoader
Text chunking for better context management
Vector storage using FAISS for requirement comparison
Code Analysis:
Static code analysis using Python's ast module
Metrics collection (functions, classes, documentation)
Syntax validation
Evaluation System:
Requirement matching using embeddings
Scoring based on multiple criteria
Structured feedback generation
Feedback Generation:
Detailed, constructive feedback
Strengths and areas for improvement
Best practices recommendations
To use this system effectively, you would need to:
Share the assignment PDF containing requirements
Provide the candidate's submitted code
Set up your OpenAI API key
Would you please share these materials so I can help you customize the validator for your specific needs? This will allow me to:
Adjust the evaluation criteria
Fine-tune the prompts
Add specific metrics relevant to your assignment
v1:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import os
from typing import Dict, List
import ast
class AssignmentValidator:
def __init__(self, openai_api_key: str):
self.llm = ChatOpenAI(
temperature=0.1,
model_name="gpt-4",
openai_api_key=openai_api_key
)
self.embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
def load_assignment_requirements(self, pdf_path: str):
"""Load and process assignment requirements from PDF"""
loader = PyPDFLoader(pdf_path)
pages = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=100
)
texts = text_splitter.split_documents(pages)
self.requirements_db = FAISS.from_documents(texts, self.embeddings)
def analyze_code(self, code_path: str) -> Dict:
"""Analyze submitted code for structure and patterns"""
with open(code_path, 'r') as file:
code = file.read()
try:
tree = ast.parse(code)
# Basic code metrics
metrics = {
'num_functions': len([node for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)]),
'num_classes': len([node for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]),
'has_main': any(isinstance(node, ast.If) and
hasattr(node.test, 'left') and
hasattr(node.test.left, 'id') and
node.test.left.id == '__name__'
for node in ast.walk(tree)),
'has_docstrings': any(isinstance(node, ast.Expr) and
isinstance(node.value, ast.Str)
for node in ast.walk(tree))
}
return metrics
except SyntaxError as e:
return {'error': f'Invalid Python syntax: {str(e)}'}
def evaluate_requirements(self, code_metrics: Dict) -> List[Dict]:
"""Compare code against assignment requirements"""
evaluation_prompt = PromptTemplate(
input_variables=["requirements", "code_metrics"],
template="""
Based on the following assignment requirements:
{requirements}
And the code metrics:
{code_metrics}
Evaluate the submission and provide:
1. A score out of 10
2. List of strengths
3. List of areas for improvement
4. Specific feedback on code structure
Format your response as a JSON object with keys:
score, strengths, improvements, feedback
"""
)
# Get relevant requirements context
requirements_context = self.requirements_db.similarity_search(
str(code_metrics), k=3
)
requirements_text = "\n".join([doc.page_content for doc in requirements_context])
# Create and run evaluation chain
chain = LLMChain(llm=self.llm, prompt=evaluation_prompt)
result = chain.run(
requirements=requirements_text,
code_metrics=str(code_metrics)
)
return result
def generate_feedback(self, evaluation_result: Dict) -> str:
"""Generate detailed feedback from evaluation results"""
feedback_prompt = PromptTemplate(
input_variables=["evaluation"],
template="""
Based on the following evaluation:
{evaluation}
Provide constructive feedback for the candidate that:
1. Acknowledges their strengths
2. Suggests specific improvements
3. Offers guidance on best practices
4. Maintains an encouraging tone
Format the feedback as a well-structured markdown document.
"""
)
chain = LLMChain(llm=self.llm, prompt=feedback_prompt)
feedback = chain.run(evaluation=str(evaluation_result))
return feedback
# Usage Example
def main():
validator = AssignmentValidator(os.getenv('OPENAI_API_KEY'))
# Load assignment requirements
validator.load_assignment_requirements('assignment.pdf')
# Analyze submitted code
code_metrics = validator.analyze_code('submission.py')
# Evaluate submission
evaluation = validator.evaluate_requirements(code_metrics)
# Generate feedback
feedback = validator.generate_feedback(evaluation)
print("Evaluation Results:", evaluation)
print("\nDetailed Feedback:", feedback)
if __name__ == "__main__":
main()
2
Also I need to have another metrics called "automated_code_meter" scoring from 1 to 10.
1 means candidate did the code by hand with less automation 10 means complete automation
I'll add an automated code meter to measure the level of automation in the candidate's code. Here's the updated version:
I've added the automated code meter functionality with the following features:
Pattern Analysis (
_analyze_automation_patterns
):Chain patterns (LangChain, etc.)
Asynchronous code patterns
Loop and iteration patterns
API integration
Configuration management
Error handling
Data processing
Pipeline patterns
Scoring System (
_calculate_automation_score
):Weighted scoring for different automation patterns
Normalized score from 1-10
Uses a logistic function for more nuanced scoring
Considers pattern frequency and complexity
The scoring is based on:
Score 1-3: Mostly manual code with basic loops and conditions
Score 4-6: Mixed approach with some automation features
Score 7-8: Good automation with multiple patterns
Score 9-10: Extensive automation with advanced patterns
The metrics now include:
metrics = {
'automation_score': 1-10, # Overall automation score
'automation_patterns': { # Detailed pattern counts
'chain_patterns': int,
'async_patterns': int,
'loop_patterns': int,
'api_calls': int,
'config_patterns': int,
'error_handling': int,
'data_processing': int,
'pipeline_patterns': int
}
}
Would you like me to add any specific automation patterns or adjust the scoring weights for your particular assignment requirements?
v2:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import os
from typing import Dict, List
import ast
class AssignmentValidator:
def __init__(self, openai_api_key: str):
self.llm = ChatOpenAI(
temperature=0.1,
model_name="gpt-4",
openai_api_key=openai_api_key
)
self.embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
def load_assignment_requirements(self, pdf_path: str):
"""Load and process assignment requirements from PDF"""
loader = PyPDFLoader(pdf_path)
pages = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=100
)
texts = text_splitter.split_documents(pages)
self.requirements_db = FAISS.from_documents(texts, self.embeddings)
def analyze_code(self, code_path: str) -> Dict:
"""Analyze submitted code for structure and patterns"""
with open(code_path, 'r') as file:
code = file.read()
try:
tree = ast.parse(code)
# Count automation-related patterns
automation_patterns = self._analyze_automation_patterns(tree)
automation_score = self._calculate_automation_score(automation_patterns)
# Basic code metrics
metrics = {
'num_functions': len([node for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)]),
'num_classes': len([node for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]),
'has_main': any(isinstance(node, ast.If) and
hasattr(node.test, 'left') and
hasattr(node.test.left, 'id') and
node.test.left.id == '__name__'
for node in ast.walk(tree)),
'has_docstrings': any(isinstance(node, ast.Expr) and
isinstance(node.value, ast.Str)
for node in ast.walk(tree)),
'automation_score': automation_score,
'automation_patterns': automation_patterns
}
return metrics
except SyntaxError as e:
return {'error': f'Invalid Python syntax: {str(e)}'}
def evaluate_requirements(self, code_metrics: Dict) -> List[Dict]:
"""Compare code against assignment requirements"""
evaluation_prompt = PromptTemplate(
input_variables=["requirements", "code_metrics"],
template="""
Based on the following assignment requirements:
{requirements}
And the code metrics:
{code_metrics}
Evaluate the submission and provide:
1. A score out of 10
2. List of strengths
3. List of areas for improvement
4. Specific feedback on code structure
Format your response as a JSON object with keys:
score, strengths, improvements, feedback
"""
)
# Get relevant requirements context
requirements_context = self.requirements_db.similarity_search(
str(code_metrics), k=3
)
requirements_text = "\n".join([doc.page_content for doc in requirements_context])
# Create and run evaluation chain
chain = LLMChain(llm=self.llm, prompt=evaluation_prompt)
result = chain.run(
requirements=requirements_text,
code_metrics=str(code_metrics)
)
return result
def generate_feedback(self, evaluation_result: Dict) -> str:
"""Generate detailed feedback from evaluation results"""
feedback_prompt = PromptTemplate(
input_variables=["evaluation"],
template="""
Based on the following evaluation:
{evaluation}
Provide constructive feedback for the candidate that:
1. Acknowledges their strengths
2. Suggests specific improvements
3. Offers guidance on best practices
4. Maintains an encouraging tone
Format the feedback as a well-structured markdown document.
"""
)
chain = LLMChain(llm=self.llm, prompt=feedback_prompt)
feedback = chain.run(evaluation=str(evaluation_result))
return feedback
# Usage Example
def main():
validator = AssignmentValidator(os.getenv('OPENAI_API_KEY'))
# Load assignment requirements
validator.load_assignment_requirements('assignment.pdf')
# Analyze submitted code
code_metrics = validator.analyze_code('submission.py')
# Evaluate submission
evaluation = validator.evaluate_requirements(code_metrics)
# Generate feedback
feedback = validator.generate_feedback(evaluation)
print("Evaluation Results:", evaluation)
print("\nDetailed Feedback:", feedback)
if __name__ == "__main__":
main()
v3:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import os
from typing import Dict, List, Set
import re
import ast
class AssignmentValidator:
def __init__(self, openai_api_key: str):
self.llm = ChatOpenAI(
temperature=0.1,
model_name="gpt-4",
openai_api_key=openai_api_key
)
self.embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
def load_assignment_requirements(self, pdf_path: str):
"""Load and process assignment requirements from PDF"""
loader = PyPDFLoader(pdf_path)
pages = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=100
)
texts = text_splitter.split_documents(pages)
self.requirements_db = FAISS.from_documents(texts, self.embeddings)
def analyze_code(self, code_path: str) -> Dict:
"""Analyze submitted code for structure and patterns"""
with open(code_path, 'r') as file:
code = file.read()
try:
tree = ast.parse(code)
# Count automation-related patterns
automation_patterns = self._analyze_automation_patterns(tree)
automation_score = self._calculate_automation_score(automation_patterns)
# Basic code metrics
metrics = {
'num_functions': len([node for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)]),
'num_classes': len([node for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]),
'has_main': any(isinstance(node, ast.If) and
hasattr(node.test, 'left') and
hasattr(node.test.left, 'id') and
node.test.left.id == '__name__'
for node in ast.walk(tree)),
'has_docstrings': any(isinstance(node, ast.Expr) and
isinstance(node.value, ast.Str)
for node in ast.walk(tree)),
'automation_score': automation_score,
'automation_patterns': automation_patterns
}
return metrics
except SyntaxError as e:
return {'error': f'Invalid Python syntax: {str(e)}'}
def evaluate_requirements(self, code_metrics: Dict) -> List[Dict]:
"""Compare code against assignment requirements"""
evaluation_prompt = PromptTemplate(
input_variables=["requirements", "code_metrics"],
template="""
Based on the following assignment requirements:
{requirements}
And the code metrics:
{code_metrics}
Evaluate the submission and provide:
1. A score out of 10
2. List of strengths
3. List of areas for improvement
4. Specific feedback on code structure
Format your response as a JSON object with keys:
score, strengths, improvements, feedback
"""
)
# Get relevant requirements context
requirements_context = self.requirements_db.similarity_search(
str(code_metrics), k=3
)
requirements_text = "\n".join([doc.page_content for doc in requirements_context])
# Create and run evaluation chain
chain = LLMChain(llm=self.llm, prompt=evaluation_prompt)
result = chain.run(
requirements=requirements_text,
code_metrics=str(code_metrics)
)
return result
def generate_feedback(self, evaluation_result: Dict) -> str:
"""Generate detailed feedback from evaluation results"""
feedback_prompt = PromptTemplate(
input_variables=["evaluation"],
template="""
Based on the following evaluation:
{evaluation}
Provide constructive feedback for the candidate that:
1. Acknowledges their strengths
2. Suggests specific improvements
3. Offers guidance on best practices
4. Maintains an encouraging tone
Format the feedback as a well-structured markdown document.
"""
)
chain = LLMChain(llm=self.llm, prompt=feedback_prompt)
feedback = chain.run(evaluation=str(evaluation_result))
return feedback
# Usage Example
def main():
validator = AssignmentValidator(os.getenv('OPENAI_API_KEY'))
# Load assignment requirements
validator.load_assignment_requirements('assignment.pdf')
# Analyze submitted code
code_metrics = validator.analyze_code('submission.py')
# Evaluate submission
evaluation = validator.evaluate_requirements(code_metrics)
# Generate feedback
feedback = validator.generate_feedback(evaluation)
print("Evaluation Results:", evaluation)
print("\nDetailed Feedback:", feedback)
if __name__ == "__main__":
main()
3
v4:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import os
from typing import Dict, List, Set
import re
import ast
class AssignmentValidator:
def __init__(self, openai_api_key: str):
self.llm = ChatOpenAI(
temperature=0.1,
model_name="gpt-4",
openai_api_key=openai_api_key
)
self.embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
def load_assignment_requirements(self, pdf_path: str):
"""Load and process assignment requirements from PDF"""
loader = PyPDFLoader(pdf_path)
pages = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=1000,
chunk_overlap=100
)
texts = text_splitter.split_documents(pages)
self.requirements_db = FAISS.from_documents(texts, self.embeddings)
def analyze_code(self, code_path: str) -> Dict:
"""Analyze submitted code for structure and patterns"""
with open(code_path, 'r') as file:
code = file.read()
try:
tree = ast.parse(code)
# Count automation-related patterns
automation_patterns = self._analyze_automation_patterns(tree)
automation_score = self._calculate_automation_score(automation_patterns)
# Basic code metrics
metrics = {
'num_functions': len([node for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)]),
'num_classes': len([node for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]),
'has_main': any(isinstance(node, ast.If) and
hasattr(node.test, 'left') and
hasattr(node.test.left, 'id') and
node.test.left.id == '__name__'
for node in ast.walk(tree)),
'has_docstrings': any(isinstance(node, ast.Expr) and
isinstance(node.value, ast.Str)
for node in ast.walk(tree)),
'automation_score': automation_score,
'automation_patterns': automation_patterns
}
return metrics
except SyntaxError as e:
return {'error': f'Invalid Python syntax: {str(e)}'}
def evaluate_requirements(self, code_metrics: Dict) -> List[Dict]:
"""Compare code against assignment requirements"""
evaluation_prompt = PromptTemplate(
input_variables=["requirements", "code_metrics"],
template="""
Based on the following assignment requirements:
{requirements}
And the code metrics:
{code_metrics}
Evaluate the submission and provide:
1. A score out of 10
2. List of strengths
3. List of areas for improvement
4. Specific feedback on code structure
Format your response as a JSON object with keys:
score, strengths, improvements, feedback
"""
)
# Get relevant requirements context
requirements_context = self.requirements_db.similarity_search(
str(code_metrics), k=3
)
requirements_text = "\n".join([doc.page_content for doc in requirements_context])
# Create and run evaluation chain
chain = LLMChain(llm=self.llm, prompt=evaluation_prompt)
result = chain.run(
requirements=requirements_text,
code_metrics=str(code_metrics)
)
return result
def _analyze_automation_patterns(self, tree: ast.AST) -> Dict:
"""Analyze code for automation patterns"""
patterns = {
'chain_patterns': 0, # LangChain or similar chain patterns
'async_patterns': 0, # Asynchronous code patterns
'loop_patterns': 0, # Automated iteration patterns
'api_calls': 0, # API integration patterns
'config_patterns': 0, # Configuration management
'error_handling': 0, # Automated error handling
'data_processing': 0, # Automated data processing
'pipeline_patterns': 0 # Data/processing pipeline patterns
}
for node in ast.walk(tree):
# Check for chain patterns (LangChain, etc.)
if isinstance(node, ast.Call) and hasattr(node.func, 'attr'):
if 'chain' in node.func.attr.lower():
patterns['chain_patterns'] += 1
# Check for async patterns
if isinstance(node, (ast.AsyncFunctionDef, ast.Await, ast.AsyncFor, ast.AsyncWith)):
patterns['async_patterns'] += 1
# Check for loop patterns
if isinstance(node, (ast.For, ast.While, ast.ListComp, ast.DictComp, ast.SetComp)):
patterns['loop_patterns'] += 1
# Check for API calls
if isinstance(node, ast.Call) and hasattr(node.func, 'attr'):
if any(api in node.func.attr.lower() for api in ['get', 'post', 'put', 'delete', 'request']):
patterns['api_calls'] += 1
# Check for configuration management
if isinstance(node, ast.Call) and hasattr(node.func, 'attr'):
if any(config in node.func.attr.lower() for config in ['config', 'setup', 'initialize']):
patterns['config_patterns'] += 1
# Check for error handling
if isinstance(node, (ast.Try, ast.ExceptHandler)):
patterns['error_handling'] += 1
# Check for data processing
if isinstance(node, ast.Call) and hasattr(node.func, 'attr'):
if any(data in node.func.attr.lower() for data in ['process', 'transform', 'convert', 'parse']):
patterns['data_processing'] += 1
# Check for pipeline patterns
if isinstance(node, ast.Call) and hasattr(node.func, 'attr'):
if any(pipeline in node.func.attr.lower() for pipeline in ['pipeline', 'flow', 'stream']):
patterns['pipeline_patterns'] += 1
return patterns
def _calculate_automation_score(self, patterns: Dict) -> int:
"""Calculate automation score from 1-10 based on patterns"""
# Weights for different patterns
weights = {
'chain_patterns': 1.5,
'async_patterns': 1.2,
'loop_patterns': 0.8,
'api_calls': 1.0,
'config_patterns': 0.7,
'error_handling': 0.6,
'data_processing': 1.3,
'pipeline_patterns': 1.4
}
# Calculate weighted sum
weighted_sum = sum(patterns[key] * weights[key] for key in patterns)
# Normalize to 1-10 scale
# Using a logistic function to create a more nuanced scale
import math
max_expected_score = 30 # Adjust based on expected maximum weighted sum
normalized_score = 1 + 9 / (1 + math.exp(-weighted_sum / max_expected_score * 6 + 3))
return round(normalized_score)
def generate_feedback(self, evaluation_result: Dict) -> str:
"""Generate detailed feedback from evaluation results"""
feedback_prompt = PromptTemplate(
input_variables=["evaluation"],
template="""
Based on the following evaluation:
{evaluation}
Provide constructive feedback for the candidate that:
1. Acknowledges their strengths
2. Suggests specific improvements
3. Offers guidance on best practices
4. Maintains an encouraging tone
Format the feedback as a well-structured markdown document.
"""
)
chain = LLMChain(llm=self.llm, prompt=feedback_prompt)
feedback = chain.run(evaluation=str(evaluation_result))
return feedback
# Usage Example
def main():
validator = AssignmentValidator(os.getenv('OPENAI_API_KEY'))
# Load assignment requirements
validator.load_assignment_requirements('assignment.pdf')
# Analyze submitted code
code_metrics = validator.analyze_code('submission.py')
# Evaluate submission
evaluation = validator.evaluate_requirements(code_metrics)
# Generate feedback
feedback = validator.generate_feedback(evaluation)
print("Evaluation Results:", evaluation)
print("\nDetailed Feedback:", feedback)
if __name__ == "__main__":
main()
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
Last updated
Was this helpful?