--- title: CASD : Test de l'utilisation de Spark keywords: fastai sidebar: home_sidebar nb_path: "notebooks/extractions_base_des_impots/test_spark.ipynb" ---
import findspark
findspark.init()
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("Basics").getOrCreate()
print(spark.version)
calib = r"C:\Users\Public\Documents\TRAVAIL\csg\data_out\CalibPOTE_2019.csv"
df = spark.read.csv(parquet_path)
hdf_pote = (
r"C:\Users\Public\Documents\TRAVAIL\csg\data_in\extraction_assiettes_csg\*.hdf"
)
hdf_pote
parquet_path = r"C:\Users\Public\Documents\TRAVAIL\csg\data_in\assiettes_csg.parquet"
parquet_path
df = spark.read.parquet(parquet_path)
df.createOrReplaceTempView("csg")
df_count = spark.sql("SELECT count(*) FROM csg")
df_count.show()
import plotly