187 lines
6.7 KiB
Python
187 lines
6.7 KiB
Python
import psycopg2
|
|
from selenium import webdriver
|
|
from selenium.webdriver.chrome.service import Service
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.common.keys import Keys
|
|
from selenium.webdriver.chrome.options import Options
|
|
from selenium.common.exceptions import NoSuchElementException
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
|
|
import os
|
|
from flask import Flask, request, jsonify
|
|
from waitress import serve
|
|
import pandas as pd
|
|
from io import StringIO
|
|
|
|
|
|
|
|
# Set options for the Chromium browser
|
|
chrome_options = Options()
|
|
chrome_options.add_argument("--headless") # Optional: Run Chromium in headless mode
|
|
chrome_options.add_argument("--no-sandbox")
|
|
chrome_options.add_argument("--disable-dev-shm-usage")
|
|
|
|
# Specify the path to the Chromium driver
|
|
service = Service('/usr/bin/chromedriver')
|
|
|
|
driver = None
|
|
|
|
def extract(username: str, password: str):
|
|
url_credentials = f'https://{username}:{password}@sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx'
|
|
url = 'https://sgu.ulsa.edu.mx/psulsa/alumnos/consultainformacionalumnos/consultainformacion.aspx'
|
|
username_integer = int(username[2:])
|
|
|
|
def insert_alumno_extraccion(datos_html: str, materias_html: str, historial_html: str = 'error', materias_actuales_html: str = 'error'):
|
|
try:
|
|
conn = psycopg2.connect(
|
|
dbname=os.getenv("DBNAME"),
|
|
user=os.getenv("DBUSER"),
|
|
password=os.getenv("DBPASSWORD"),
|
|
host=os.getenv("DBHOST"),
|
|
port=os.getenv("DBPORT")
|
|
)
|
|
cur = conn.cursor()
|
|
|
|
insert_query = """
|
|
INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", datos_html, materias_html, historial_html, materias_actuales_html) VALUES (%s, TRIM(%s), TRIM(%s), TRIM(%s)::JSONB, TRIM(%s))
|
|
ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET datos_html = EXCLUDED.datos_html, materias_html = EXCLUDED.materias_html, error_message = NULL, registrado = DEFAULT;
|
|
"""
|
|
cur.execute(insert_query, (username_integer, datos_html, materias_html, historial_html, materias_actuales_html))
|
|
|
|
conn.commit()
|
|
return cur.query.decode('utf-8')
|
|
except psycopg2.ProgrammingError as e:
|
|
print(f"Error de sintaxis: {e}")
|
|
except psycopg2.IntegrityError as e:
|
|
print(f"Error de integridad: {e}")
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
finally:
|
|
cur.close()
|
|
conn.close()
|
|
|
|
def update_alumno_extraccion_error(error: str):
|
|
try:
|
|
|
|
conn = psycopg2.connect(
|
|
dbname=os.getenv("DBNAME"),
|
|
user=os.getenv("DBUSER"),
|
|
password=os.getenv("DBPASSWORD"),
|
|
host=os.getenv("DBHOST"),
|
|
port=os.getenv("DBPORT")
|
|
)
|
|
cur = conn.cursor()
|
|
|
|
update_query = """
|
|
INSERT INTO public.alumno_extraccion ("Usuario_claveULSA", error_message) VALUES (%s, %s)
|
|
ON CONFLICT ("Usuario_claveULSA") DO UPDATE SET error_message = EXCLUDED.error_message,
|
|
materias_html = DEFAULT, registrado = DEFAULT;
|
|
"""
|
|
cur.execute(update_query, (username_integer, error[:255]))
|
|
|
|
conn.commit()
|
|
print("Data updated successfully")
|
|
except psycopg2.ProgrammingError as e:
|
|
print(f"Error de sintaxis: {e}")
|
|
|
|
finally:
|
|
cur.close()
|
|
conn.close()
|
|
|
|
try:
|
|
driver.get(url_credentials)
|
|
driver.get(url)
|
|
|
|
# si no existe el elemento, ctl00_contenedor_control
|
|
datos_html = driver.find_element(By.ID, 'ctl00_contenedor_control').get_attribute('innerHTML')
|
|
|
|
elemento = WebDriverWait(driver, 3.5).until(
|
|
EC.presence_of_element_located((By.ID, 'ctl00_contenedor_HistorialAlumno1_lblBtnSeccionHAcademico'))
|
|
)
|
|
elemento.click()
|
|
|
|
# Get the HTML content of the materias element
|
|
materias_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_divHAcademico').get_attribute('innerHTML')
|
|
historial_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_gvMaterias').get_attribute('innerHTML')
|
|
# materias_actuales_html = driver.find_element(By.ID, 'ctl00_contenedor_HistorialAlumno1_div13').get_attribute('innerHTML')
|
|
|
|
historial_html_io = StringIO(f"<table>{historial_html}</table>")
|
|
# Read the HTML table into a DataFrame
|
|
df = pd.read_html(historial_html_io)[0]
|
|
|
|
|
|
if 'GRUPO' not in df.columns:
|
|
raise KeyError("Column 'GRUPO' not found in the DataFrame")
|
|
|
|
# Convert the DataFrame to JSON
|
|
json_result = df[df['GRUPO'] != 'Promedio:'].to_json(orient='records')
|
|
|
|
# Connect to PostgreSQL database
|
|
query = insert_alumno_extraccion(datos_html, materias_html, json_result)
|
|
|
|
print("Data extracted successfully")
|
|
return json_result
|
|
|
|
except NoSuchElementException as e:
|
|
update_alumno_extraccion_error(str(e))
|
|
def se_puede_extraer():
|
|
try:
|
|
conn = conn = psycopg2.connect(
|
|
dbname=os.getenv("DBNAME"),
|
|
user=os.getenv("DBUSER"),
|
|
password=os.getenv("DBPASSWORD"),
|
|
host=os.getenv("DBHOST"),
|
|
port=os.getenv("DBPORT")
|
|
)
|
|
cursor = conn.cursor()
|
|
|
|
# SELECCIONAR ULTIMA planeacion
|
|
|
|
|
|
query = """
|
|
SELECT 1
|
|
FROM alumno_extraccion_fecha
|
|
WHERE CURRENT_DATE BETWEEN fecha_inicio AND fecha_fin
|
|
ORDER BY CREATED_AT DESC
|
|
LIMIT 1;
|
|
"""
|
|
# Ejecuta la consulta
|
|
cursor.execute(query)
|
|
result = cursor.fetchone()
|
|
|
|
# Verifica si se obtuvo algún resultado
|
|
exists = result is not None
|
|
|
|
# Cierra el cursor y la conexión
|
|
cursor.close()
|
|
conn.close()
|
|
|
|
return exists
|
|
except Exception as e:
|
|
print(f"Error: {e}")
|
|
return False
|
|
|
|
app = Flask(__name__)
|
|
|
|
@app.route('/calificaciones', methods=['POST'])
|
|
def main():
|
|
global driver
|
|
# Initialize the WebDriver
|
|
driver = webdriver.Chrome(service=service, options=chrome_options)
|
|
|
|
username = request.form.get('clave')
|
|
password = request.form.get('password')
|
|
se_puede = se_puede_extraer()
|
|
if se_puede:
|
|
query = extract(username, password)
|
|
|
|
# Close the session
|
|
driver.quit()
|
|
|
|
return jsonify({"message": "Data extracted successfully", "en-fecha": se_puede})
|
|
|
|
if __name__ == '__main__':
|
|
serve(app, host='0.0.0.0', port=5000)
|
|
|