Assignment Validator

I am validating the GenAI Engineer assignments done by candidates.

You should understand the assignment (in pdf) and check the code to rate the assignment.

Also you have to give feedback to the assignment.

Write the code in LangChain and OpenAI for the assignment validator.

I'll help you create a code for validating GenAI Engineer assignments. However, I notice you haven't shared the PDF document containing the assignment details yet. To provide accurate validation and feedback, I'll need:

The assignment PDF document to understand the requirements
The candidate's submitted code to evaluate

Once you provide these, I can help create a validation system using LangChain and OpenAI. In the meantime, I can outline a general framework for what such a validator might look like:

I've created a framework for an assignment validator that includes:

Assignment Requirement Analysis:
- PDF loading and processing using PyPDFLoader
- Text chunking for better context management
- Vector storage using FAISS for requirement comparison
Code Analysis:
- Static code analysis using Python's ast module
- Metrics collection (functions, classes, documentation)
- Syntax validation
Evaluation System:
- Requirement matching using embeddings
- Scoring based on multiple criteria
- Structured feedback generation
Feedback Generation:
- Detailed, constructive feedback
- Strengths and areas for improvement
- Best practices recommendations

To use this system effectively, you would need to:

Share the assignment PDF containing requirements
Provide the candidate's submitted code
Set up your OpenAI API key

Would you please share these materials so I can help you customize the validator for your specific needs? This will allow me to:

Adjust the evaluation criteria
Fine-tune the prompts
Add specific metrics relevant to your assignment

v1:

from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import os
from typing import Dict, List
import ast

class AssignmentValidator:
    def __init__(self, openai_api_key: str):
        self.llm = ChatOpenAI(
            temperature=0.1,
            model_name="gpt-4",
            openai_api_key=openai_api_key
        )
        self.embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
        
    def load_assignment_requirements(self, pdf_path: str):
        """Load and process assignment requirements from PDF"""
        loader = PyPDFLoader(pdf_path)
        pages = loader.load()
        
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=100
        )
        texts = text_splitter.split_documents(pages)
        
        self.requirements_db = FAISS.from_documents(texts, self.embeddings)
        
    def analyze_code(self, code_path: str) -> Dict:
        """Analyze submitted code for structure and patterns"""
        with open(code_path, 'r') as file:
            code = file.read()
            
        try:
            tree = ast.parse(code)
            
            # Basic code metrics
            metrics = {
                'num_functions': len([node for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)]),
                'num_classes': len([node for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]),
                'has_main': any(isinstance(node, ast.If) and 
                              hasattr(node.test, 'left') and 
                              hasattr(node.test.left, 'id') and 
                              node.test.left.id == '__name__' 
                              for node in ast.walk(tree)),
                'has_docstrings': any(isinstance(node, ast.Expr) and 
                                    isinstance(node.value, ast.Str)
                                    for node in ast.walk(tree))
            }
            return metrics
            
        except SyntaxError as e:
            return {'error': f'Invalid Python syntax: {str(e)}'}
            
    def evaluate_requirements(self, code_metrics: Dict) -> List[Dict]:
        """Compare code against assignment requirements"""
        evaluation_prompt = PromptTemplate(
            input_variables=["requirements", "code_metrics"],
            template="""
            Based on the following assignment requirements:
            {requirements}
            
            And the code metrics:
            {code_metrics}
            
            Evaluate the submission and provide:
            1. A score out of 10
            2. List of strengths
            3. List of areas for improvement
            4. Specific feedback on code structure
            
            Format your response as a JSON object with keys: 
            score, strengths, improvements, feedback
            """
        )
        
        # Get relevant requirements context
        requirements_context = self.requirements_db.similarity_search(
            str(code_metrics), k=3
        )
        requirements_text = "\n".join([doc.page_content for doc in requirements_context])
        
        # Create and run evaluation chain
        chain = LLMChain(llm=self.llm, prompt=evaluation_prompt)
        result = chain.run(
            requirements=requirements_text,
            code_metrics=str(code_metrics)
        )
        
        return result
        
    def generate_feedback(self, evaluation_result: Dict) -> str:
        """Generate detailed feedback from evaluation results"""
        feedback_prompt = PromptTemplate(
            input_variables=["evaluation"],
            template="""
            Based on the following evaluation:
            {evaluation}
            
            Provide constructive feedback for the candidate that:
            1. Acknowledges their strengths
            2. Suggests specific improvements
            3. Offers guidance on best practices
            4. Maintains an encouraging tone
            
            Format the feedback as a well-structured markdown document.
            """
        )
        
        chain = LLMChain(llm=self.llm, prompt=feedback_prompt)
        feedback = chain.run(evaluation=str(evaluation_result))
        
        return feedback

# Usage Example
def main():
    validator = AssignmentValidator(os.getenv('OPENAI_API_KEY'))
    
    # Load assignment requirements
    validator.load_assignment_requirements('assignment.pdf')
    
    # Analyze submitted code
    code_metrics = validator.analyze_code('submission.py')
    
    # Evaluate submission
    evaluation = validator.evaluate_requirements(code_metrics)
    
    # Generate feedback
    feedback = validator.generate_feedback(evaluation)
    
    print("Evaluation Results:", evaluation)
    print("\nDetailed Feedback:", feedback)

if __name__ == "__main__":
    main()

Also I need to have another metrics called "automated_code_meter" scoring from 1 to 10.

1 means candidate did the code by hand with less automation 10 means complete automation

I'll add an automated code meter to measure the level of automation in the candidate's code. Here's the updated version:

I've added the automated code meter functionality with the following features:

Pattern Analysis (_analyze_automation_patterns):
- Chain patterns (LangChain, etc.)
- Asynchronous code patterns
- Loop and iteration patterns
- API integration
- Configuration management
- Error handling
- Data processing
- Pipeline patterns
Scoring System (_calculate_automation_score):
- Weighted scoring for different automation patterns
- Normalized score from 1-10
- Uses a logistic function for more nuanced scoring
- Considers pattern frequency and complexity

The scoring is based on:

Score 1-3: Mostly manual code with basic loops and conditions
Score 4-6: Mixed approach with some automation features
Score 7-8: Good automation with multiple patterns
Score 9-10: Extensive automation with advanced patterns

The metrics now include:

metrics = {
    'automation_score': 1-10,  # Overall automation score
    'automation_patterns': {    # Detailed pattern counts
        'chain_patterns': int,
        'async_patterns': int,
        'loop_patterns': int,
        'api_calls': int,
        'config_patterns': int,
        'error_handling': int,
        'data_processing': int,
        'pipeline_patterns': int
    }
}

Would you like me to add any specific automation patterns or adjust the scoring weights for your particular assignment requirements?

v2:

from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import os
from typing import Dict, List
import ast

class AssignmentValidator:
    def __init__(self, openai_api_key: str):
        self.llm = ChatOpenAI(
            temperature=0.1,
            model_name="gpt-4",
            openai_api_key=openai_api_key
        )
        self.embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
        
    def load_assignment_requirements(self, pdf_path: str):
        """Load and process assignment requirements from PDF"""
        loader = PyPDFLoader(pdf_path)
        pages = loader.load()
        
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=100
        )
        texts = text_splitter.split_documents(pages)
        
        self.requirements_db = FAISS.from_documents(texts, self.embeddings)
        
    def analyze_code(self, code_path: str) -> Dict:
        """Analyze submitted code for structure and patterns"""
        with open(code_path, 'r') as file:
            code = file.read()
            
        try:
            tree = ast.parse(code)
            
            # Count automation-related patterns
            automation_patterns = self._analyze_automation_patterns(tree)
            automation_score = self._calculate_automation_score(automation_patterns)
            
            # Basic code metrics
            metrics = {
                'num_functions': len([node for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)]),
                'num_classes': len([node for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]),
                'has_main': any(isinstance(node, ast.If) and 
                              hasattr(node.test, 'left') and 
                              hasattr(node.test.left, 'id') and 
                              node.test.left.id == '__name__' 
                              for node in ast.walk(tree)),
                'has_docstrings': any(isinstance(node, ast.Expr) and 
                                    isinstance(node.value, ast.Str)
                                    for node in ast.walk(tree)),
                'automation_score': automation_score,
                'automation_patterns': automation_patterns
            }
            return metrics
            
        except SyntaxError as e:
            return {'error': f'Invalid Python syntax: {str(e)}'}
            
    def evaluate_requirements(self, code_metrics: Dict) -> List[Dict]:
        """Compare code against assignment requirements"""
        evaluation_prompt = PromptTemplate(
            input_variables=["requirements", "code_metrics"],
            template="""
            Based on the following assignment requirements:
            {requirements}
            
            And the code metrics:
            {code_metrics}
            
            Evaluate the submission and provide:
            1. A score out of 10
            2. List of strengths
            3. List of areas for improvement
            4. Specific feedback on code structure
            
            Format your response as a JSON object with keys: 
            score, strengths, improvements, feedback
            """
        )
        
        # Get relevant requirements context
        requirements_context = self.requirements_db.similarity_search(
            str(code_metrics), k=3
        )
        requirements_text = "\n".join([doc.page_content for doc in requirements_context])
        
        # Create and run evaluation chain
        chain = LLMChain(llm=self.llm, prompt=evaluation_prompt)
        result = chain.run(
            requirements=requirements_text,
            code_metrics=str(code_metrics)
        )
        
        return result
        
    def generate_feedback(self, evaluation_result: Dict) -> str:
        """Generate detailed feedback from evaluation results"""
        feedback_prompt = PromptTemplate(
            input_variables=["evaluation"],
            template="""
            Based on the following evaluation:
            {evaluation}
            
            Provide constructive feedback for the candidate that:
            1. Acknowledges their strengths
            2. Suggests specific improvements
            3. Offers guidance on best practices
            4. Maintains an encouraging tone
            
            Format the feedback as a well-structured markdown document.
            """
        )
        
        chain = LLMChain(llm=self.llm, prompt=feedback_prompt)
        feedback = chain.run(evaluation=str(evaluation_result))
        
        return feedback

# Usage Example
def main():
    validator = AssignmentValidator(os.getenv('OPENAI_API_KEY'))
    
    # Load assignment requirements
    validator.load_assignment_requirements('assignment.pdf')
    
    # Analyze submitted code
    code_metrics = validator.analyze_code('submission.py')
    
    # Evaluate submission
    evaluation = validator.evaluate_requirements(code_metrics)
    
    # Generate feedback
    feedback = validator.generate_feedback(evaluation)
    
    print("Evaluation Results:", evaluation)
    print("\nDetailed Feedback:", feedback)

if __name__ == "__main__":
    main()

v3:

from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import os
from typing import Dict, List, Set
import re
import ast

class AssignmentValidator:
    def __init__(self, openai_api_key: str):
        self.llm = ChatOpenAI(
            temperature=0.1,
            model_name="gpt-4",
            openai_api_key=openai_api_key
        )
        self.embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
        
    def load_assignment_requirements(self, pdf_path: str):
        """Load and process assignment requirements from PDF"""
        loader = PyPDFLoader(pdf_path)
        pages = loader.load()
        
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=100
        )
        texts = text_splitter.split_documents(pages)
        
        self.requirements_db = FAISS.from_documents(texts, self.embeddings)
        
    def analyze_code(self, code_path: str) -> Dict:
        """Analyze submitted code for structure and patterns"""
        with open(code_path, 'r') as file:
            code = file.read()
            
        try:
            tree = ast.parse(code)
            
            # Count automation-related patterns
            automation_patterns = self._analyze_automation_patterns(tree)
            automation_score = self._calculate_automation_score(automation_patterns)
            
            # Basic code metrics
            metrics = {
                'num_functions': len([node for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)]),
                'num_classes': len([node for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]),
                'has_main': any(isinstance(node, ast.If) and 
                              hasattr(node.test, 'left') and 
                              hasattr(node.test.left, 'id') and 
                              node.test.left.id == '__name__' 
                              for node in ast.walk(tree)),
                'has_docstrings': any(isinstance(node, ast.Expr) and 
                                    isinstance(node.value, ast.Str)
                                    for node in ast.walk(tree)),
                'automation_score': automation_score,
                'automation_patterns': automation_patterns
            }
            return metrics
            
        except SyntaxError as e:
            return {'error': f'Invalid Python syntax: {str(e)}'}
            
    def evaluate_requirements(self, code_metrics: Dict) -> List[Dict]:
        """Compare code against assignment requirements"""
        evaluation_prompt = PromptTemplate(
            input_variables=["requirements", "code_metrics"],
            template="""
            Based on the following assignment requirements:
            {requirements}
            
            And the code metrics:
            {code_metrics}
            
            Evaluate the submission and provide:
            1. A score out of 10
            2. List of strengths
            3. List of areas for improvement
            4. Specific feedback on code structure
            
            Format your response as a JSON object with keys: 
            score, strengths, improvements, feedback
            """
        )
        
        # Get relevant requirements context
        requirements_context = self.requirements_db.similarity_search(
            str(code_metrics), k=3
        )
        requirements_text = "\n".join([doc.page_content for doc in requirements_context])
        
        # Create and run evaluation chain
        chain = LLMChain(llm=self.llm, prompt=evaluation_prompt)
        result = chain.run(
            requirements=requirements_text,
            code_metrics=str(code_metrics)
        )
        
        return result
        
    def generate_feedback(self, evaluation_result: Dict) -> str:
        """Generate detailed feedback from evaluation results"""
        feedback_prompt = PromptTemplate(
            input_variables=["evaluation"],
            template="""
            Based on the following evaluation:
            {evaluation}
            
            Provide constructive feedback for the candidate that:
            1. Acknowledges their strengths
            2. Suggests specific improvements
            3. Offers guidance on best practices
            4. Maintains an encouraging tone
            
            Format the feedback as a well-structured markdown document.
            """
        )
        
        chain = LLMChain(llm=self.llm, prompt=feedback_prompt)
        feedback = chain.run(evaluation=str(evaluation_result))
        
        return feedback

# Usage Example
def main():
    validator = AssignmentValidator(os.getenv('OPENAI_API_KEY'))
    
    # Load assignment requirements
    validator.load_assignment_requirements('assignment.pdf')
    
    # Analyze submitted code
    code_metrics = validator.analyze_code('submission.py')
    
    # Evaluate submission
    evaluation = validator.evaluate_requirements(code_metrics)
    
    # Generate feedback
    feedback = validator.generate_feedback(evaluation)
    
    print("Evaluation Results:", evaluation)
    print("\nDetailed Feedback:", feedback)

if __name__ == "__main__":
    main()

v4:

from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import os
from typing import Dict, List, Set
import re
import ast

class AssignmentValidator:
    def __init__(self, openai_api_key: str):
        self.llm = ChatOpenAI(
            temperature=0.1,
            model_name="gpt-4",
            openai_api_key=openai_api_key
        )
        self.embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
        
    def load_assignment_requirements(self, pdf_path: str):
        """Load and process assignment requirements from PDF"""
        loader = PyPDFLoader(pdf_path)
        pages = loader.load()
        
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=100
        )
        texts = text_splitter.split_documents(pages)
        
        self.requirements_db = FAISS.from_documents(texts, self.embeddings)
        
    def analyze_code(self, code_path: str) -> Dict:
        """Analyze submitted code for structure and patterns"""
        with open(code_path, 'r') as file:
            code = file.read()
            
        try:
            tree = ast.parse(code)
            
            # Count automation-related patterns
            automation_patterns = self._analyze_automation_patterns(tree)
            automation_score = self._calculate_automation_score(automation_patterns)
            
            # Basic code metrics
            metrics = {
                'num_functions': len([node for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)]),
                'num_classes': len([node for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]),
                'has_main': any(isinstance(node, ast.If) and 
                              hasattr(node.test, 'left') and 
                              hasattr(node.test.left, 'id') and 
                              node.test.left.id == '__name__' 
                              for node in ast.walk(tree)),
                'has_docstrings': any(isinstance(node, ast.Expr) and 
                                    isinstance(node.value, ast.Str)
                                    for node in ast.walk(tree)),
                'automation_score': automation_score,
                'automation_patterns': automation_patterns
            }
            return metrics
            
        except SyntaxError as e:
            return {'error': f'Invalid Python syntax: {str(e)}'}
            
    def evaluate_requirements(self, code_metrics: Dict) -> List[Dict]:
        """Compare code against assignment requirements"""
        evaluation_prompt = PromptTemplate(
            input_variables=["requirements", "code_metrics"],
            template="""
            Based on the following assignment requirements:
            {requirements}
            
            And the code metrics:
            {code_metrics}
            
            Evaluate the submission and provide:
            1. A score out of 10
            2. List of strengths
            3. List of areas for improvement
            4. Specific feedback on code structure
            
            Format your response as a JSON object with keys: 
            score, strengths, improvements, feedback
            """
        )
        
        # Get relevant requirements context
        requirements_context = self.requirements_db.similarity_search(
            str(code_metrics), k=3
        )
        requirements_text = "\n".join([doc.page_content for doc in requirements_context])
        
        # Create and run evaluation chain
        chain = LLMChain(llm=self.llm, prompt=evaluation_prompt)
        result = chain.run(
            requirements=requirements_text,
            code_metrics=str(code_metrics)
        )
        
        return result
        
    def _analyze_automation_patterns(self, tree: ast.AST) -> Dict:
        """Analyze code for automation patterns"""
        patterns = {
            'chain_patterns': 0,  # LangChain or similar chain patterns
            'async_patterns': 0,  # Asynchronous code patterns
            'loop_patterns': 0,   # Automated iteration patterns
            'api_calls': 0,       # API integration patterns
            'config_patterns': 0,  # Configuration management
            'error_handling': 0,   # Automated error handling
            'data_processing': 0,  # Automated data processing
            'pipeline_patterns': 0 # Data/processing pipeline patterns
        }
        
        for node in ast.walk(tree):
            # Check for chain patterns (LangChain, etc.)
            if isinstance(node, ast.Call) and hasattr(node.func, 'attr'):
                if 'chain' in node.func.attr.lower():
                    patterns['chain_patterns'] += 1
                    
            # Check for async patterns
            if isinstance(node, (ast.AsyncFunctionDef, ast.Await, ast.AsyncFor, ast.AsyncWith)):
                patterns['async_patterns'] += 1
                
            # Check for loop patterns
            if isinstance(node, (ast.For, ast.While, ast.ListComp, ast.DictComp, ast.SetComp)):
                patterns['loop_patterns'] += 1
                
            # Check for API calls
            if isinstance(node, ast.Call) and hasattr(node.func, 'attr'):
                if any(api in node.func.attr.lower() for api in ['get', 'post', 'put', 'delete', 'request']):
                    patterns['api_calls'] += 1
                    
            # Check for configuration management
            if isinstance(node, ast.Call) and hasattr(node.func, 'attr'):
                if any(config in node.func.attr.lower() for config in ['config', 'setup', 'initialize']):
                    patterns['config_patterns'] += 1
                    
            # Check for error handling
            if isinstance(node, (ast.Try, ast.ExceptHandler)):
                patterns['error_handling'] += 1
                
            # Check for data processing
            if isinstance(node, ast.Call) and hasattr(node.func, 'attr'):
                if any(data in node.func.attr.lower() for data in ['process', 'transform', 'convert', 'parse']):
                    patterns['data_processing'] += 1
                    
            # Check for pipeline patterns
            if isinstance(node, ast.Call) and hasattr(node.func, 'attr'):
                if any(pipeline in node.func.attr.lower() for pipeline in ['pipeline', 'flow', 'stream']):
                    patterns['pipeline_patterns'] += 1
                    
        return patterns
        
    def _calculate_automation_score(self, patterns: Dict) -> int:
        """Calculate automation score from 1-10 based on patterns"""
        # Weights for different patterns
        weights = {
            'chain_patterns': 1.5,
            'async_patterns': 1.2,
            'loop_patterns': 0.8,
            'api_calls': 1.0,
            'config_patterns': 0.7,
            'error_handling': 0.6,
            'data_processing': 1.3,
            'pipeline_patterns': 1.4
        }
        
        # Calculate weighted sum
        weighted_sum = sum(patterns[key] * weights[key] for key in patterns)
        
        # Normalize to 1-10 scale
        # Using a logistic function to create a more nuanced scale
        import math
        max_expected_score = 30  # Adjust based on expected maximum weighted sum
        normalized_score = 1 + 9 / (1 + math.exp(-weighted_sum / max_expected_score * 6 + 3))
        
        return round(normalized_score)
        
    def generate_feedback(self, evaluation_result: Dict) -> str:
        """Generate detailed feedback from evaluation results"""
        feedback_prompt = PromptTemplate(
            input_variables=["evaluation"],
            template="""
            Based on the following evaluation:
            {evaluation}
            
            Provide constructive feedback for the candidate that:
            1. Acknowledges their strengths
            2. Suggests specific improvements
            3. Offers guidance on best practices
            4. Maintains an encouraging tone
            
            Format the feedback as a well-structured markdown document.
            """
        )
        
        chain = LLMChain(llm=self.llm, prompt=feedback_prompt)
        feedback = chain.run(evaluation=str(evaluation_result))
        
        return feedback

# Usage Example
def main():
    validator = AssignmentValidator(os.getenv('OPENAI_API_KEY'))
    
    # Load assignment requirements
    validator.load_assignment_requirements('assignment.pdf')
    
    # Analyze submitted code
    code_metrics = validator.analyze_code('submission.py')
    
    # Evaluate submission
    evaluation = validator.evaluate_requirements(code_metrics)
    
    # Generate feedback
    feedback = validator.generate_feedback(evaluation)
    
    print("Evaluation Results:", evaluation)
    print("\nDetailed Feedback:", feedback)

if __name__ == "__main__":
    main()

PreviousGenAI Cards NextAuth

Last updated 3 months ago

Was this helpful?

metrics = { 'automation_score': 1-10, # Overall automation score 'automation_patterns': { # Detailed pattern counts 'chain_patterns': int, 'async_patterns': int, 'loop_patterns': int, 'api_calls': int, 'config_patterns': int, 'error_handling': int, 'data_processing': int, 'pipeline_patterns': int } }

from langchain.chat_models import ChatOpenAI from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.embeddings import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chains import LLMChain from langchain.prompts import PromptTemplate import os from typing import Dict, List, Set import re import ast class AssignmentValidator: def __init__(self, openai_api_key: str): self.llm = ChatOpenAI( temperature=0.1, model_name="gpt-4", openai_api_key=openai_api_key ) self.embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) def load_assignment_requirements(self, pdf_path: str): """Load and process assignment requirements from PDF""" loader = PyPDFLoader(pdf_path) pages = loader.load() text_splitter = RecursiveCharacterTextSplitter( chunk_size=1000, chunk_overlap=100 ) texts = text_splitter.split_documents(pages) self.requirements_db = FAISS.from_documents(texts, self.embeddings) def analyze_code(self, code_path: str) -> Dict: """Analyze submitted code for structure and patterns""" with open(code_path, 'r') as file: code = file.read() try: tree = ast.parse(code) # Count automation-related patterns automation_patterns = self._analyze_automation_patterns(tree) automation_score = self._calculate_automation_score(automation_patterns) # Basic code metrics metrics = { 'num_functions': len([node for node in ast.walk(tree) if isinstance(node, ast.FunctionDef)]), 'num_classes': len([node for node in ast.walk(tree) if isinstance(node, ast.ClassDef)]), 'has_main': any(isinstance(node, ast.If) and hasattr(node.test, 'left') and hasattr(node.test.left, 'id') and node.test.left.id == '__name__' for node in ast.walk(tree)), 'has_docstrings': any(isinstance(node, ast.Expr) and isinstance(node.value, ast.Str) for node in ast.walk(tree)), 'automation_score': automation_score, 'automation_patterns': automation_patterns } return metrics except SyntaxError as e: return {'error': f'Invalid Python syntax: {str(e)}'} def evaluate_requirements(self, code_metrics: Dict) -> List[Dict]: """Compare code against assignment requirements""" evaluation_prompt = PromptTemplate( input_variables=["requirements", "code_metrics"], template=""" Based on the following assignment requirements: {requirements} And the code metrics: {code_metrics} Evaluate the submission and provide: 1. A score out of 10 2. List of strengths 3. List of areas for improvement 4. Specific feedback on code structure Format your response as a JSON object with keys: score, strengths, improvements, feedback """ ) # Get relevant requirements context requirements_context = self.requirements_db.similarity_search( str(code_metrics), k=3 ) requirements_text = "\n".join([doc.page_content for doc in requirements_context]) # Create and run evaluation chain chain = LLMChain(llm=self.llm, prompt=evaluation_prompt) result = chain.run( requirements=requirements_text, code_metrics=str(code_metrics) ) return result def _analyze_automation_patterns(self, tree: ast.AST) -> Dict: """Analyze code for automation patterns""" patterns = { 'chain_patterns': 0, # LangChain or similar chain patterns 'async_patterns': 0, # Asynchronous code patterns 'loop_patterns': 0, # Automated iteration patterns 'api_calls': 0, # API integration patterns 'config_patterns': 0, # Configuration management 'error_handling': 0, # Automated error handling 'data_processing': 0, # Automated data processing 'pipeline_patterns': 0 # Data/processing pipeline patterns } for node in ast.walk(tree): # Check for chain patterns (LangChain, etc.) if isinstance(node, ast.Call) and hasattr(node.func, 'attr'): if 'chain' in node.func.attr.lower(): patterns['chain_patterns'] += 1 # Check for async patterns if isinstance(node, (ast.AsyncFunctionDef, ast.Await, ast.AsyncFor, ast.AsyncWith)): patterns['async_patterns'] += 1 # Check for loop patterns if isinstance(node, (ast.For, ast.While, ast.ListComp, ast.DictComp, ast.SetComp)): patterns['loop_patterns'] += 1 # Check for API calls if isinstance(node, ast.Call) and hasattr(node.func, 'attr'): if any(api in node.func.attr.lower() for api in ['get', 'post', 'put', 'delete', 'request']): patterns['api_calls'] += 1 # Check for configuration management if isinstance(node, ast.Call) and hasattr(node.func, 'attr'): if any(config in node.func.attr.lower() for config in ['config', 'setup', 'initialize']): patterns['config_patterns'] += 1 # Check for error handling if isinstance(node, (ast.Try, ast.ExceptHandler)): patterns['error_handling'] += 1 # Check for data processing if isinstance(node, ast.Call) and hasattr(node.func, 'attr'): if any(data in node.func.attr.lower() for data in ['process', 'transform', 'convert', 'parse']): patterns['data_processing'] += 1 # Check for pipeline patterns if isinstance(node, ast.Call) and hasattr(node.func, 'attr'): if any(pipeline in node.func.attr.lower() for pipeline in ['pipeline', 'flow', 'stream']): patterns['pipeline_patterns'] += 1 return patterns def _calculate_automation_score(self, patterns: Dict) -> int: """Calculate automation score from 1-10 based on patterns""" # Weights for different patterns weights = { 'chain_patterns': 1.5, 'async_patterns': 1.2, 'loop_patterns': 0.8, 'api_calls': 1.0, 'config_patterns': 0.7, 'error_handling': 0.6, 'data_processing': 1.3, 'pipeline_patterns': 1.4 } # Calculate weighted sum weighted_sum = sum(patterns[key] * weights[key] for key in patterns) # Normalize to 1-10 scale # Using a logistic function to create a more nuanced scale import math max_expected_score = 30 # Adjust based on expected maximum weighted sum normalized_score = 1 + 9 / (1 + math.exp(-weighted_sum / max_expected_score * 6 + 3)) return round(normalized_score) def generate_feedback(self, evaluation_result: Dict) -> str: """Generate detailed feedback from evaluation results""" feedback_prompt = PromptTemplate( input_variables=["evaluation"], template=""" Based on the following evaluation: {evaluation} Provide constructive feedback for the candidate that: 1. Acknowledges their strengths 2. Suggests specific improvements 3. Offers guidance on best practices 4. Maintains an encouraging tone Format the feedback as a well-structured markdown document. """ ) chain = LLMChain(llm=self.llm, prompt=feedback_prompt) feedback = chain.run(evaluation=str(evaluation_result)) return feedback # Usage Example def main(): validator = AssignmentValidator(os.getenv('OPENAI_API_KEY')) # Load assignment requirements validator.load_assignment_requirements('assignment.pdf') # Analyze submitted code code_metrics = validator.analyze_code('submission.py') # Evaluate submission evaluation = validator.evaluate_requirements(code_metrics) # Generate feedback feedback = validator.generate_feedback(evaluation) print("Evaluation Results:", evaluation) print("\nDetailed Feedback:", feedback) if __name__ == "__main__": main()