Optimización de código

This commit is contained in:
Your Name
2024-09-10 09:48:07 -06:00
parent 06feae543a
commit cfd666f70b

191
app.py
View File

@@ -7,6 +7,7 @@ from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
import os
from flask import Flask, request, jsonify
@@ -26,69 +27,68 @@ chrome_options.add_argument("--disable-dev-shm-usage")
service = Service('/usr/bin/chromedriver')
driver = None
def insert_alumno_extraccion(datos_html: str, materias_html: str, historial_html: str = 'error', materias_actuales_html: str = 'error'):
try:
conn = psycopg2.connect(
dbname=os.getenv("DBNAME"),
user=os.getenv("DBUSER"),
password=os.getenv("DBPASSWORD"),
host=os.getenv("DBHOST"),
port=os.getenv("DBPORT")
)
cur = conn.cursor()
insert_query = """
INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", datos_html, materias_html, historial_html, materias_actuales_html) VALUES (%s, TRIM(%s), TRIM(%s), TRIM(%s)::JSONB, TRIM(%s))
ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET datos_html = EXCLUDED.datos_html, materias_html = EXCLUDED.materias_html, error_message = NULL, registrado = DEFAULT, historial_html = EXCLUDED.historial_html;
"""
cur.execute(insert_query, (username_integer, datos_html, materias_html, historial_html, materias_actuales_html))
conn.commit()
return cur.query.decode('utf-8')
except psycopg2.ProgrammingError as e:
print(f"Error de sintaxis: {e}")
except psycopg2.IntegrityError as e:
print(f"Error de integridad: {e}")
except Exception as e:
print(f"Error: {e}")
finally:
cur.close()
conn.close()
def update_alumno_extraccion_error(error: str):
try:
conn = psycopg2.connect(
dbname=os.getenv("DBNAME"),
user=os.getenv("DBUSER"),
password=os.getenv("DBPASSWORD"),
host=os.getenv("DBHOST"),
port=os.getenv("DBPORT")
)
cur = conn.cursor()
update_query = """
INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", error_message) VALUES (%s, %s)
ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET error_message = EXCLUDED.error_message,
materias_html = DEFAULT, registrado = DEFAULT;
"""
cur.execute(update_query, (username_integer, error[:255]))
conn.commit()
print("Data updated successfully")
except psycopg2.ProgrammingError as e:
print(f"Error de sintaxis: {e}")
finally:
cur.close()
conn.close()
def extract(username: str, password: str):
url_credentials = f'https://{username}:{password}@sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx'
url = 'https://sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx'
username_integer = int(username[2:])
def insert_alumno_extraccion(datos_html: str, materias_html: str, historial_html: str = 'error', materias_actuales_html: str = 'error'):
try:
conn = psycopg2.connect(
dbname=os.getenv("DBNAME"),
user=os.getenv("DBUSER"),
password=os.getenv("DBPASSWORD"),
host=os.getenv("DBHOST"),
port=os.getenv("DBPORT")
)
cur = conn.cursor()
insert_query = """
INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", datos_html, materias_html, historial_html, materias_actuales_html) VALUES (%s, TRIM(%s), TRIM(%s), TRIM(%s)::JSONB, TRIM(%s))
ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET datos_html = EXCLUDED.datos_html, materias_html = EXCLUDED.materias_html, error_message = NULL, registrado = DEFAULT, historial_html = EXCLUDED.historial_html;
"""
cur.execute(insert_query, (username_integer, datos_html, materias_html, historial_html, materias_actuales_html))
conn.commit()
return cur.query.decode('utf-8')
except psycopg2.ProgrammingError as e:
print(f"Error de sintaxis: {e}")
except psycopg2.IntegrityError as e:
print(f"Error de integridad: {e}")
except Exception as e:
print(f"Error: {e}")
finally:
cur.close()
conn.close()
def update_alumno_extraccion_error(error: str):
try:
conn = psycopg2.connect(
dbname=os.getenv("DBNAME"),
user=os.getenv("DBUSER"),
password=os.getenv("DBPASSWORD"),
host=os.getenv("DBHOST"),
port=os.getenv("DBPORT")
)
cur = conn.cursor()
update_query = """
INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", error_message) VALUES (%s, %s)
ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET error_message = EXCLUDED.error_message,
materias_html = DEFAULT, registrado = DEFAULT;
"""
cur.execute(update_query, (username_integer, error[:255]))
conn.commit()
print("Data updated successfully")
except psycopg2.ProgrammingError as e:
print(f"Error de sintaxis: {e}")
finally:
cur.close()
conn.close()
try:
driver.get(url_credentials)
driver.get(url)
@@ -99,71 +99,70 @@ def extract(username: str, password: str):
elemento = WebDriverWait(driver, 3.5).until(
EC.presence_of_element_located((By.ID, 'ctl00_contenedor_HistorialAlumno1_lblBtnSeccionHAcademico'))
)
elemento.click()
# Get the HTML content of the materias element
# Intentar varias veces en caso de un `StaleElementReferenceException`
for _ in range(3):
try:
elemento.click()
break # Si se hace clic correctamente, salir del bucle
except StaleElementReferenceException:
print("Elemento 'stale', intentando de nuevo...")
elemento = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_lblBtnSeccionHAcademico')
# Obtener el HTML de las materias
materias_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_divHAcademico').get_attribute('innerHTML')
historial_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_gvMaterias').get_attribute('innerHTML')
# materias_actuales_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_div13').get_attribute('innerHTML')
# Manejar el historial como DataFrame
historial_html_io = StringIO(f"<table>{historial_html}</table>")
# Read the HTML table into a DataFrame
df = pd.read_html(historial_html_io)[0]
if 'GRUPO' not in df.columns:
raise KeyError("Column 'GRUPO' not found in the DataFrame")
raise KeyError("Column 'GRUPO' not found in the DataFrame")
# Remove decimals from the 'PERIODO' column (if present)
df['PERIODO'] = df['PERIODO'].apply(lambda x: str(x).replace('.0', '') if isinstance(x, (float, int)) else x)
# Convert the DataFrame to JSON
json_result = df[df['GRUPO'] != 'Promedio:'].to_json(orient='records')
# Connect to PostgreSQL database
query = insert_alumno_extraccion(datos_html, materias_html, json_result)
print("Data extracted successfully")
return json_result
except NoSuchElementException as e:
update_alumno_extraccion_error(str(e))
def se_puede_extraer():
try:
conn = conn = psycopg2.connect(
def se_puede():
try:
# Establece la conexión a la base de datos usando with para gestionar automáticamente el cierre
with psycopg2.connect(
dbname=os.getenv("DBNAME"),
user=os.getenv("DBUSER"),
password=os.getenv("DBPASSWORD"),
host=os.getenv("DBHOST"),
port=os.getenv("DBPORT")
)
cursor = conn.cursor()
# SELECCIONAR ULTIMA planeacion
query = """
SELECT 1
FROM alumno_extraccion_fecha
WHERE CURRENT_DATE BETWEEN fecha_inicio AND fecha_fin
ORDER BY CREATED_AT DESC
LIMIT 1;
) as conn:
with conn.cursor() as cursor:
# SELECCIONAR ÚLTIMA planeacion
query = """
SELECT 1
FROM alumno_extraccion_fecha
WHERE CURRENT_DATE BETWEEN fecha_inicio AND fecha_fin
ORDER BY CREATED_AT DESC
LIMIT 1;
"""
# Ejecuta la consulta
cursor.execute(query)
result = cursor.fetchone()
# Verifica si se obtuvo algún resultado
exists = result is not None
# Cierra el cursor y la conexión
cursor.close()
conn.close()
return exists
# Ejecuta la consulta
cursor.execute(query)
result = cursor.fetchone()
# Verifica si se obtuvo algún resultado
return result is not None
except psycopg2.Error as e:
# Maneja errores específicos de la base de datos
print(f"Error en la base de datos: {e}")
except Exception as e:
print(f"Error: {e}")
return False
# Maneja otros tipos de errores
print(f"Error general: {e}")
return False
app = Flask(__name__)