diff --git a/app.py b/app.py index c608310..106b08e 100644 --- a/app.py +++ b/app.py @@ -7,6 +7,7 @@ from selenium.webdriver.chrome.options import Options from selenium.common.exceptions import NoSuchElementException from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC +from selenium.common.exceptions import StaleElementReferenceException import os from flask import Flask, request, jsonify @@ -26,69 +27,68 @@ chrome_options.add_argument("--disable-dev-shm-usage") service = Service('/usr/bin/chromedriver') driver = None + +def insert_alumno_extraccion(datos_html: str, materias_html: str, historial_html: str = 'error', materias_actuales_html: str = 'error'): + try: + conn = psycopg2.connect( + dbname=os.getenv("DBNAME"), + user=os.getenv("DBUSER"), + password=os.getenv("DBPASSWORD"), + host=os.getenv("DBHOST"), + port=os.getenv("DBPORT") + ) + cur = conn.cursor() + + insert_query = """ + INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", datos_html, materias_html, historial_html, materias_actuales_html) VALUES (%s, TRIM(%s), TRIM(%s), TRIM(%s)::JSONB, TRIM(%s)) + ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET datos_html = EXCLUDED.datos_html, materias_html = EXCLUDED.materias_html, error_message = NULL, registrado = DEFAULT, historial_html = EXCLUDED.historial_html; + """ + cur.execute(insert_query, (username_integer, datos_html, materias_html, historial_html, materias_actuales_html)) + + conn.commit() + return cur.query.decode('utf-8') + except psycopg2.ProgrammingError as e: + print(f"Error de sintaxis: {e}") + except psycopg2.IntegrityError as e: + print(f"Error de integridad: {e}") + except Exception as e: + print(f"Error: {e}") + finally: + cur.close() + conn.close() +def update_alumno_extraccion_error(error: str): + try: + + conn = psycopg2.connect( + dbname=os.getenv("DBNAME"), + user=os.getenv("DBUSER"), + password=os.getenv("DBPASSWORD"), + host=os.getenv("DBHOST"), + port=os.getenv("DBPORT") + ) + cur = conn.cursor() + + update_query = """ + INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", error_message) VALUES (%s, %s) + ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET error_message = EXCLUDED.error_message, + materias_html = DEFAULT, registrado = DEFAULT; + """ + cur.execute(update_query, (username_integer, error[:255])) + + conn.commit() + print("Data updated successfully") + except psycopg2.ProgrammingError as e: + print(f"Error de sintaxis: {e}") + + finally: + cur.close() + conn.close() def extract(username: str, password: str): url_credentials = f'https://{username}:{password}@sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx' url = 'https://sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx' username_integer = int(username[2:]) - def insert_alumno_extraccion(datos_html: str, materias_html: str, historial_html: str = 'error', materias_actuales_html: str = 'error'): - try: - conn = psycopg2.connect( - dbname=os.getenv("DBNAME"), - user=os.getenv("DBUSER"), - password=os.getenv("DBPASSWORD"), - host=os.getenv("DBHOST"), - port=os.getenv("DBPORT") - ) - cur = conn.cursor() - - insert_query = """ - INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", datos_html, materias_html, historial_html, materias_actuales_html) VALUES (%s, TRIM(%s), TRIM(%s), TRIM(%s)::JSONB, TRIM(%s)) - ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET datos_html = EXCLUDED.datos_html, materias_html = EXCLUDED.materias_html, error_message = NULL, registrado = DEFAULT, historial_html = EXCLUDED.historial_html; - """ - cur.execute(insert_query, (username_integer, datos_html, materias_html, historial_html, materias_actuales_html)) - - conn.commit() - return cur.query.decode('utf-8') - except psycopg2.ProgrammingError as e: - print(f"Error de sintaxis: {e}") - except psycopg2.IntegrityError as e: - print(f"Error de integridad: {e}") - except Exception as e: - print(f"Error: {e}") - finally: - cur.close() - conn.close() - - def update_alumno_extraccion_error(error: str): - try: - - conn = psycopg2.connect( - dbname=os.getenv("DBNAME"), - user=os.getenv("DBUSER"), - password=os.getenv("DBPASSWORD"), - host=os.getenv("DBHOST"), - port=os.getenv("DBPORT") - ) - cur = conn.cursor() - - update_query = """ - INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", error_message) VALUES (%s, %s) - ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET error_message = EXCLUDED.error_message, - materias_html = DEFAULT, registrado = DEFAULT; - """ - cur.execute(update_query, (username_integer, error[:255])) - - conn.commit() - print("Data updated successfully") - except psycopg2.ProgrammingError as e: - print(f"Error de sintaxis: {e}") - - finally: - cur.close() - conn.close() - try: driver.get(url_credentials) driver.get(url) @@ -99,71 +99,70 @@ def extract(username: str, password: str): elemento = WebDriverWait(driver, 3.5).until( EC.presence_of_element_located((By.ID, 'ctl00_contenedor_HistorialAlumno1_lblBtnSeccionHAcademico')) ) - elemento.click() - # Get the HTML content of the materias element + # Intentar varias veces en caso de un `StaleElementReferenceException` + for _ in range(3): + try: + elemento.click() + break # Si se hace clic correctamente, salir del bucle + except StaleElementReferenceException: + print("Elemento 'stale', intentando de nuevo...") + elemento = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_lblBtnSeccionHAcademico') + + # Obtener el HTML de las materias materias_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_divHAcademico').get_attribute('innerHTML') historial_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_gvMaterias').get_attribute('innerHTML') - # materias_actuales_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_div13').get_attribute('innerHTML') + # Manejar el historial como DataFrame historial_html_io = StringIO(f"