You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

347 lines
13 KiB

from flask import Blueprint, render_template, request, jsonify
import requests
import json
import logging
feature_engineering_bp = Blueprint('feature_engineering', __name__)
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
@feature_engineering_bp.route('/')
def feature_engineering():
"""Main feature engineering page"""
return render_template('feature_engineering.html')
@feature_engineering_bp.route('/api/test-deepseek', methods=['POST'])
def test_deepseek_api():
"""Test API connection for both Deepseek and Kimi"""
try:
api_key = request.headers.get('X-API-Key')
if not api_key:
return jsonify({'success': False, 'error': 'API key is required'}), 400
data = request.get_json() or {}
provider = data.get('provider', 'deepseek')
model_name = data.get('model_name', 'deepseek-chat')
# Set up API endpoint and headers based on provider
if provider == 'kimi':
api_url = 'https://api.moonshot.cn/v1/chat/completions'
headers = {
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}
else: # deepseek
api_url = 'https://api.deepseek.com/chat/completions'
headers = {
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}
test_data = {
'model': model_name,
'messages': [
{'role': 'user', 'content': 'Hello, this is a test message.'}
],
'max_tokens': 10
}
response = requests.post(
api_url,
headers=headers,
json=test_data,
timeout=10
)
if response.status_code == 200:
return jsonify({'success': True, 'message': f'{provider.capitalize()} API connection successful'})
else:
error_detail = response.text
return jsonify({'success': False, 'error': f'API returned status {response.status_code}: {error_detail}'}), 400
except requests.exceptions.RequestException as e:
logger.error(f"API test error: {str(e)}")
return jsonify({'success': False, 'error': f'Network error: {str(e)}'}), 500
except Exception as e:
logger.error(f"Unexpected error in API test: {str(e)}")
return jsonify({'success': False, 'error': f'Unexpected error: {str(e)}'}), 500
@feature_engineering_bp.route('/api/get-recommendations', methods=['POST'])
def get_feature_engineering_recommendations():
"""Get feature engineering recommendations from API"""
try:
api_key = request.headers.get('X-API-Key')
if not api_key:
return jsonify({'success': False, 'error': 'API key is required'}), 400
data = request.get_json()
current_step = data.get('current_step', 1)
data_field = data.get('data_field', '')
previous_steps = data.get('previous_steps', [])
current_data_state = data.get('current_data_state', 'raw data')
provider = data.get('provider', 'deepseek')
model_name = data.get('model_name', 'deepseek-chat')
if not data_field:
return jsonify({'success': False, 'error': 'Data field description is required'}), 400
# Build the system prompt
system_prompt = get_default_system_prompt_text()
# Build the user prompt
previous_steps_text = "None" if not previous_steps else ", ".join([f"Step {i+1}: {step}" for i, step in enumerate(previous_steps)])
user_prompt = f"""Context:
Current step: {current_step}
Current data field: {data_field}
Previous steps and categories used: {previous_steps_text}
Current data state: {current_data_state}"""
# Set up API endpoint and headers based on provider
if provider == 'kimi':
api_url = 'https://api.moonshot.cn/v1/chat/completions'
headers = {
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}
else: # deepseek
api_url = 'https://api.deepseek.com/chat/completions'
headers = {
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}
api_data = {
'model': model_name,
'messages': [
{'role': 'system', 'content': system_prompt},
{'role': 'user', 'content': user_prompt}
],
'max_tokens': 8192,
'temperature': 0.7
}
response = requests.post(
api_url,
headers=headers,
json=api_data,
timeout=120
)
if response.status_code == 200:
response_data = response.json()
recommendations = response_data['choices'][0]['message']['content']
return jsonify({
'success': True,
'recommendations': recommendations,
'current_step': current_step,
'data_field': data_field,
'previous_steps': previous_steps,
'current_data_state': current_data_state
})
else:
error_detail = response.text
logger.error(f"{provider.capitalize()} API error: {response.status_code} - {error_detail}")
return jsonify({'success': False, 'error': f'API returned status {response.status_code}: {error_detail}'}), 400
except requests.exceptions.RequestException as e:
logger.error(f"API request error: {str(e)}")
return jsonify({'success': False, 'error': f'Network error: {str(e)}'}), 500
except Exception as e:
logger.error(f"Unexpected error in get recommendations: {str(e)}")
return jsonify({'success': False, 'error': f'Unexpected error: {str(e)}'}), 500
@feature_engineering_bp.route('/api/continue-conversation', methods=['POST'])
def continue_conversation():
"""Continue the conversation with follow-up questions"""
try:
api_key = request.headers.get('X-API-Key')
if not api_key:
return jsonify({'success': False, 'error': 'API key is required'}), 400
data = request.get_json()
conversation_history = data.get('conversation_history', [])
user_message = data.get('user_message', '')
custom_system_prompt = data.get('custom_system_prompt', None)
provider = data.get('provider', 'deepseek')
model_name = data.get('model_name', 'deepseek-chat')
if not user_message:
return jsonify({'success': False, 'error': 'User message is required'}), 400
# Build conversation messages
messages = []
# Use custom system prompt if provided, otherwise use default
if custom_system_prompt:
system_prompt = custom_system_prompt
else:
system_prompt = get_default_system_prompt_text()
messages.append({'role': 'system', 'content': system_prompt})
# Add conversation history
for msg in conversation_history:
messages.append(msg)
# Add new user message
messages.append({'role': 'user', 'content': user_message})
print(user_message)
# Set up API endpoint and headers based on provider
if provider == 'kimi':
api_url = 'https://api.moonshot.cn/v1/chat/completions'
headers = {
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}
else: # deepseek
api_url = 'https://api.deepseek.com/chat/completions'
headers = {
'Authorization': f'Bearer {api_key}',
'Content-Type': 'application/json'
}
api_data = {
'model': model_name,
'messages': messages,
'max_tokens': 8192,
'temperature': 0.7
}
response = requests.post(
api_url,
headers=headers,
json=api_data,
timeout=120
)
if response.status_code == 200:
response_data = response.json()
assistant_response = response_data['choices'][0]['message']['content']
return jsonify({
'success': True,
'response': assistant_response
})
else:
error_detail = response.text
logger.error(f"{provider.capitalize()} API error: {response.status_code} - {error_detail}")
return jsonify({'success': False, 'error': f'API returned status {response.status_code}: {error_detail}'}), 400
except requests.exceptions.RequestException as e:
logger.error(f"API request error: {str(e)}")
return jsonify({'success': False, 'error': f'Network error: {str(e)}'}), 500
except Exception as e:
logger.error(f"Unexpected error in continue conversation: {str(e)}")
return jsonify({'success': False, 'error': f'Unexpected error: {str(e)}'}), 500
def get_default_system_prompt_text():
"""Get the default system prompt text"""
return """You are an expert feature engineering assistant. Your job is to help design a multi-step feature engineering pipeline, with up to 6 steps, for a given data field. At each step, you will recommend the most viable feature engineering category (from a set of 15 categories) based on the current data state, the previous steps, and the high-level goal.
Instructions:
At each step, you will be given:
The current step number.
The current data field and its description.
The previous steps and categories used (if any).
The current data state (e.g., normalized, filtered, etc.).
Your task is to:
List the most viable feature engineering categories for the next step, choosing from the following 15 categories:
Basic Arithmetic & Mathematical Operations
Logical & Conditional Operations
Time Series: Change Detection & Value Comparison
Time Series: Statistical Feature Engineering
Time Series: Ranking, Scaling, and Normalization
Time Series: Decay, Smoothing, and Turnover Control
Time Series: Extremes & Position Identification
Cross-Sectional: Ranking, Scaling, and Normalization
Cross-Sectional: Regression & Neutralization
Cross-Sectional: Distributional Transformation & Truncation
Transformational & Filtering Operations
Group Aggregation & Statistical Summary
Group Ranking, Scaling, and Normalization
Group Regression & Neutralization
Group Imputation & Backfilling
For each recommended category, present your answer in the following format:
Repeat the full context for each option.
Explicitly state the chosen next step category.
Give a concise reason for the choice.
Output Format:
Viable categories for Step X:
option 1 for Step X: Context: Current step: [number] Current data field: [description] Previous steps and categories used: [list] Current data state: [description in very detail of how the data is transformed to the current state by the previous steps and its logic] Choose next step: [Category Name] Reason: [explanation]
option 2 for Step X: Context: Current step: [number] Current data field: [description] Previous steps and categories used: [list] Current data state: [description in very detail of how the data is transformed to the current state by the previous steps and its logic] Choose next step: [Category Name] Reason: [explanation]
... (continue for all viable options, Only recommend categories that are logical and meaningful given the current data state and previous steps.)
Additional Instructions:
If certain categories are not appropriate at this step, do not list them.
Be concise and clear in your explanations.
Do not suggest operators unless specifically requested.
You will receive the following input at each step:
Context:
Current step:
Current data field:
Previous steps and categories used:
Current data state:
When you receive the input, respond in the format above.
IMPORTANT: Do NOT include any summary, recommendations, rationale, or additional explanations after the options. Only provide the options in the exact format above. Do NOT add sections like "Most recommended choice", "Rationale", "Best Choice", or "Would you like to proceed". Stop immediately after listing all options.
"""
@feature_engineering_bp.route('/api/get-default-system-prompt', methods=['GET'])
def get_default_system_prompt():
"""Get the default system prompt"""
try:
return jsonify({
'success': True,
'default_system_prompt': get_default_system_prompt_text()
})
except Exception as e:
logger.error(f"Error getting default system prompt: {str(e)}")
return jsonify({'success': False, 'error': f'Unexpected error: {str(e)}'}), 500