import psycopg2 from psycopg2 import pool from selenium import webdriver from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.chrome.options import Options from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import StaleElementReferenceException import os from flask import Flask, request, jsonify from waitress import serve import pandas as pd from io import StringIO # Crear un pool de conexiones global connection_pool = psycopg2.pool.ThreadedConnectionPool( minconn=1, maxconn=10, # Define el tamaño máximo del pool dbname=os.getenv("DBNAME"), user=os.getenv("DBUSER"), password=os.getenv("DBPASSWORD"), host=os.getenv("DBHOST"), port=os.getenv("DBPORT") ) def get_db_connection(): """Obtiene una conexión del pool""" return connection_pool.getconn() def release_db_connection(conn): """Libera una conexión y la devuelve al pool""" connection_pool.putconn(conn) # Set options for the Chromium browser chrome_options = Options() chrome_options.add_argument("--headless") # Optional: Run Chromium in headless mode chrome_options.add_argument("--no-sandbox") chrome_options.add_argument("--disable-dev-shm-usage") # Specify the path to the Chromium driver service = Service('/usr/bin/chromedriver') driver = None def insert_alumno_extraccion(datos_html: str, materias_html: str, username_integer: int, historial_html: str = 'error', materias_actuales_html: str = 'error'): conn = get_db_connection() # Obtener una conexión del pool try: cur = conn.cursor() insert_query = """ INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", datos_html, materias_html, historial_html, materias_actuales_html, updated_at) VALUES (%s, TRIM(%s), TRIM(%s), TRIM(%s)::JSONB, TRIM(%s), NOW()) ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET datos_html = EXCLUDED.datos_html, materias_html = EXCLUDED.materias_html, error_message = NULL, registrado = DEFAULT, historial_html = EXCLUDED.historial_html, updated_at = NOW(); """ cur.execute(insert_query, (username_integer, datos_html, materias_html, historial_html, materias_actuales_html)) conn.commit() return cur.query.decode('utf-8') except psycopg2.ProgrammingError as e: print(f"Error de sintaxis: {e}") except psycopg2.IntegrityError as e: print(f"Error de integridad: {e}") except Exception as e: print(f"Error: {e}") finally: cur.close() release_db_connection(conn) # Liberar la conexión def update_alumno_extraccion_error(username_integer: int, error: str): conn = get_db_connection() # Obtener una conexión del pool try: cur = conn.cursor() update_query = """ INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", error_message, updated_at) VALUES (%s, %s, NOW()) ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET error_message = EXCLUDED.error_message, materias_html = DEFAULT, registrado = DEFAULT, updated_at = NOW(); """ cur.execute(update_query, (username_integer, error)) conn.commit() print("Data updated successfully") except psycopg2.ProgrammingError as e: print(f"Error de sintaxis: {e}") finally: cur.close() release_db_connection(conn) # Liberar la conexión def se_puede_extraer(): conn = get_db_connection() # Obtener una conexión del pool try: with conn.cursor() as cursor: query = """ SELECT 1 FROM alumno_extraccion_fecha WHERE CURRENT_DATE BETWEEN fecha_inicio AND fecha_fin ORDER BY CREATED_AT DESC LIMIT 1; """ cursor.execute(query) result = cursor.fetchone() return result is not None except psycopg2.Error as e: print(f"Error en la base de datos: {e}") except Exception as e: print(f"Error general: {e}") finally: release_db_connection(conn) # Liberar la conexión def extract(driver, username: str, password: str): url_credentials = f'https://{username}:{password}@sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx' url = 'https://sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx' username_integer = int(username[2:]) try: driver.get(url_credentials) driver.get(url) # si no existe el elemento, ctl00_contenedor_control datos_html = driver.find_element(By.ID, 'ctl00_contenedor_control').get_attribute('innerHTML') elemento = WebDriverWait(driver, os.getenv("WAIT_TIME")).until( EC.presence_of_element_located((By.ID, 'ctl00_contenedor_HistorialAlumno1_lblBtnSeccionHAcademico')) ) # Intentar varias veces en caso de un `StaleElementReferenceException` for _ in range(3): try: elemento.click() break # Si se hace clic correctamente, salir del bucle except StaleElementReferenceException: print("Elemento 'stale', intentando de nuevo...") elemento = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_lblBtnSeccionHAcademico') # Obtener el HTML de las materias materias_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_divHAcademico').get_attribute('innerHTML') historial_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_gvMaterias').get_attribute('innerHTML') # Manejar el historial como DataFrame historial_html_io = StringIO(f"{historial_html}
") df = pd.read_html(historial_html_io)[0] json_result = None if 'GRUPO' in df.columns: df['PERIODO'] = df['PERIODO'].apply(lambda x: str(x).replace('.0', '') if isinstance(x, (float, int)) else x) json_result = df[df['GRUPO'] != 'Promedio:'].to_json(orient='records') query = insert_alumno_extraccion(datos_html, materias_html, username_integer, json_result) print("Data extracted successfully") return json_result except Exception as e: update_alumno_extraccion_error(username_integer, str(e)) app = Flask(__name__) @app.route('/calificaciones', methods=['POST']) def main(): try: # Inicializa el WebDriver driver = webdriver.Chrome(service=service, options=chrome_options) username = request.form.get('clave') password = request.form.get('password') se_puede = se_puede_extraer() if se_puede: query = extract(driver, username, password) return jsonify({"message": "Data extracted successfully", "en-fecha": se_puede}) finally: if driver is not None: driver.quit() # Asegura que el driver se cierre if __name__ == '__main__': serve(app, host='0.0.0.0', port=5000)