diff --git a/app.py b/app.py index c608310..106b08e 100644 --- a/app.py +++ b/app.py @@ -7,6 +7,7 @@ from selenium.webdriver.chrome.options import Options from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC +from selenium.common.exceptions import StaleElementReferenceException import os from flask import Flask, request, jsonify @@ -26,69 +27,68 @@ chrome_options.add_argument("--disable-dev-shm-usage") service = Service('/usr/bin/chromedriver') driver = None + +def insert_alumno_extraccion(datos_html: str, materias_html: str, historial_html: str = 'error', materias_actuales_html: str = 'error'): + try: + conn = psycopg2.connect( + dbname=os.getenv("DBNAME"), + user=os.getenv("DBUSER"), + password=os.getenv("DBPASSWORD"), + host=os.getenv("DBHOST"), + port=os.getenv("DBPORT") + ) + cur = conn.cursor() + + insert_query = """ + INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", datos_html, materias_html, historial_html, materias_actuales_html) VALUES (%s, TRIM(%s), TRIM(%s), TRIM(%s)::JSONB, TRIM(%s)) + ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET datos_html = EXCLUDED.datos_html, materias_html = EXCLUDED.materias_html, error_message = NULL, registrado = DEFAULT, historial_html = EXCLUDED.historial_html; + """ + cur.execute(insert_query, (username_integer, datos_html, materias_html, historial_html, materias_actuales_html)) + + conn.commit() + return cur.query.decode('utf-8') + except psycopg2.ProgrammingError as e: + print(f"Error de sintaxis: {e}") + except psycopg2.IntegrityError as e: + print(f"Error de integridad: {e}") + except Exception as e: + print(f"Error: {e}") + finally: + cur.close() + conn.close() +def update_alumno_extraccion_error(error: str): + try: + + conn = psycopg2.connect( + dbname=os.getenv("DBNAME"), + user=os.getenv("DBUSER"), + password=os.getenv("DBPASSWORD"), + host=os.getenv("DBHOST"), + port=os.getenv("DBPORT") + ) + cur = conn.cursor() + + update_query = """ + INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", error_message) VALUES (%s, %s) + ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET error_message = EXCLUDED.error_message, + materias_html = DEFAULT, registrado = DEFAULT; + """ + cur.execute(update_query, (username_integer, error[:255])) + + conn.commit() + print("Data updated successfully") + except psycopg2.ProgrammingError as e: + print(f"Error de sintaxis: {e}") + + finally: + cur.close() + conn.close() def extract(username: str, password: str): url_credentials = f'https://{username}:{password}@sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx' url = 'https://sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx' username_integer = int(username[2:]) - def insert_alumno_extraccion(datos_html: str, materias_html: str, historial_html: str = 'error', materias_actuales_html: str = 'error'): - try: - conn = psycopg2.connect( - dbname=os.getenv("DBNAME"), - user=os.getenv("DBUSER"), - password=os.getenv("DBPASSWORD"), - host=os.getenv("DBHOST"), - port=os.getenv("DBPORT") - ) - cur = conn.cursor() - - insert_query = """ - INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", datos_html, materias_html, historial_html, materias_actuales_html) VALUES (%s, TRIM(%s), TRIM(%s), TRIM(%s)::JSONB, TRIM(%s)) - ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET datos_html = EXCLUDED.datos_html, materias_html = EXCLUDED.materias_html, error_message = NULL, registrado = DEFAULT, historial_html = EXCLUDED.historial_html; - """ - cur.execute(insert_query, (username_integer, datos_html, materias_html, historial_html, materias_actuales_html)) - - conn.commit() - return cur.query.decode('utf-8') - except psycopg2.ProgrammingError as e: - print(f"Error de sintaxis: {e}") - except psycopg2.IntegrityError as e: - print(f"Error de integridad: {e}") - except Exception as e: - print(f"Error: {e}") - finally: - cur.close() - conn.close() - - def update_alumno_extraccion_error(error: str): - try: - - conn = psycopg2.connect( - dbname=os.getenv("DBNAME"), - user=os.getenv("DBUSER"), - password=os.getenv("DBPASSWORD"), - host=os.getenv("DBHOST"), - port=os.getenv("DBPORT") - ) - cur = conn.cursor() - - update_query = """ - INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", error_message) VALUES (%s, %s) - ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET error_message = EXCLUDED.error_message, - materias_html = DEFAULT, registrado = DEFAULT; - """ - cur.execute(update_query, (username_integer, error[:255])) - - conn.commit() - print("Data updated successfully") - except psycopg2.ProgrammingError as e: - print(f"Error de sintaxis: {e}") - - finally: - cur.close() - conn.close() - try: driver.get(url_credentials) driver.get(url) @@ -99,71 +99,70 @@ def extract(username: str, password: str): elemento = WebDriverWait(driver, 3.5).until( EC.presence_of_element_located((By.ID, 'ctl00_contenedor_HistorialAlumno1_lblBtnSeccionHAcademico')) ) - elemento.click() - # Get the HTML content of the materias element + # Intentar varias veces en caso de un `StaleElementReferenceException` + for _ in range(3): + try: + elemento.click() + break # Si se hace clic correctamente, salir del bucle + except StaleElementReferenceException: + print("Elemento 'stale', intentando de nuevo...") + elemento = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_lblBtnSeccionHAcademico') + + # Obtener el HTML de las materias materias_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_divHAcademico').get_attribute('innerHTML') historial_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_gvMaterias').get_attribute('innerHTML') - # materias_actuales_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_div13').get_attribute('innerHTML') + # Manejar el historial como DataFrame historial_html_io = StringIO(f"{historial_html}
") - # Read the HTML table into a DataFrame df = pd.read_html(historial_html_io)[0] - if 'GRUPO' not in df.columns: - raise KeyError("Column 'GRUPO' not found in the DataFrame") + raise KeyError("Column 'GRUPO' not found in the DataFrame") - # Remove decimals from the 'PERIODO' column (if present) df['PERIODO'] = df['PERIODO'].apply(lambda x: str(x).replace('.0', '') if isinstance(x, (float, int)) else x) - # Convert the DataFrame to JSON json_result = df[df['GRUPO'] != 'Promedio:'].to_json(orient='records') - # Connect to PostgreSQL database query = insert_alumno_extraccion(datos_html, materias_html, json_result) - print("Data extracted successfully") return json_result except NoSuchElementException as e: update_alumno_extraccion_error(str(e)) -def se_puede_extraer(): - try: - conn = conn = psycopg2.connect( + +def se_puede(): + try: + # Establece la conexión a la base de datos usando with para gestionar automáticamente el cierre + with psycopg2.connect( dbname=os.getenv("DBNAME"), user=os.getenv("DBUSER"), password=os.getenv("DBPASSWORD"), host=os.getenv("DBHOST"), port=os.getenv("DBPORT") - ) - cursor = conn.cursor() - - # SELECCIONAR ULTIMA planeacion - - - query = """ - SELECT 1 - FROM alumno_extraccion_fecha - WHERE CURRENT_DATE BETWEEN fecha_inicio AND fecha_fin - ORDER BY CREATED_AT DESC - LIMIT 1; + ) as conn: + with conn.cursor() as cursor: + # SELECCIONAR ÚLTIMA planeacion + query = """ + SELECT 1 + FROM alumno_extraccion_fecha + WHERE CURRENT_DATE BETWEEN fecha_inicio AND fecha_fin + ORDER BY CREATED_AT DESC + LIMIT 1; """ - # Ejecuta la consulta - cursor.execute(query) - result = cursor.fetchone() - - # Verifica si se obtuvo algún resultado - exists = result is not None - - # Cierra el cursor y la conexión - cursor.close() - conn.close() - - return exists + # Ejecuta la consulta + cursor.execute(query) + result = cursor.fetchone() + + # Verifica si se obtuvo algún resultado + return result is not None + except psycopg2.Error as e: + # Maneja errores específicos de la base de datos + print(f"Error en la base de datos: {e}") except Exception as e: - print(f"Error: {e}") - return False + # Maneja otros tipos de errores + print(f"Error general: {e}") + return False app = Flask(__name__)