--- title: CASD : Test de l'utilisation de Spark keywords: fastai sidebar: home_sidebar nb_path: "notebooks/extractions_base_des_impots/test_spark.ipynb" ---
{% raw %}
{% endraw %} {% raw %}
import findspark
{% endraw %} {% raw %}
findspark.init()
{% endraw %} {% raw %}
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("Basics").getOrCreate()
print(spark.version)
{% endraw %} {% raw %}
calib = r"C:\Users\Public\Documents\TRAVAIL\csg\data_out\CalibPOTE_2019.csv"
{% endraw %} {% raw %}
df = spark.read.csv(parquet_path)
{% endraw %} {% raw %}
hdf_pote = (
    r"C:\Users\Public\Documents\TRAVAIL\csg\data_in\extraction_assiettes_csg\*.hdf"
)
hdf_pote
{% endraw %} {% raw %}
parquet_path = r"C:\Users\Public\Documents\TRAVAIL\csg\data_in\assiettes_csg.parquet"
parquet_path
{% endraw %} {% raw %}
 
{% endraw %} {% raw %}
df = spark.read.parquet(parquet_path)
{% endraw %} {% raw %}
df.createOrReplaceTempView("csg")
df_count = spark.sql("SELECT count(*) FROM csg")
df_count.show()
{% endraw %} {% raw %}
import plotly
{% endraw %} {% raw %}
 
{% endraw %}