A Proof of Concept package to build Data Pipelines from Eurostat DataSources.
npm i @decoder-leco/poc-eurostat-data-transformers
# pnpm add @decoder-leco/poc-eurostat-data-transformers
import pl from "nodejs-polars"
import { ingesters, transformers } from "@decoder-leco/poc-eurostat-data-transformers"
/**
* This will pull https://github.com/decoderleco/deces_europe/blob/main/data/csv/proj_19np__custom_2224172_linear.csv
*/
const populationLinearProjection_2019_2024_Ingester = new ingesters.DecoderLecoGithubDataIngester(
"main", // may be a git branch, a git tag, or a git commit hash
"data/csv/proj_19np__custom_2224172_linear.csv", // path in the repo of the CSV file
`./data_pipeline_workdir/42` // any folder path in the local filesystem, as you choose, where the file will locally be persisted
)
const populationLinearProjection_2019_2024_Transformer = new transformers.DecoderLecoGithubDataTransformer(
"./data_pipeline_workdir/42/data/csv/proj_19np__custom_2224172_linear.csv",
"./data_pipeline_workdir/42/transformedData/proj_19np_transformed.csv"
)
const runExamplePipeline = async(): Promise<pl.DataFrame> => {
await populationLinearProjection_2019_2024_Ingester.run()
return await populationLinearProjection_2019_2024_Transformer.run()
}
const resultDF = await runExamplePipeline()
import pl from "nodejs-polars"
import { ingesters, transformers } from "@decoder-leco/poc-eurostat-data-transformers/"
/**
* This will pull https://github.com/decoderleco/deces_europe/blob/main/data/csv/deces_ireland.csv
*/
const irelandPopulationDeathsData_Ingester = new ingesters.DecoderLecoGithubDataIngester(
/**
* may be a git branch, a git tag, or a
* git commit hash
*/
"main",
/**
* path in the repo of the CSV file
*/
"data/csv/deces_ireland.csv",
/**
* any folder path in the local filesystem, as
* you choose, where the file will locally be
* persisted
*/
`./data_pipeline_workdir/42`
)
const irelandPopulationDeathsData_Transformer = new transformers.DecoderLecoGithubDataTransformer(
"./data_pipeline_workdir/42/data/csv/deces_ireland.csv",
"./data_pipeline_workdir/42/transformedData/deces_ireland.csv"
)
const runExamplePipeline = async(): Promise<pl.DataFrame> => {
await irelandPopulationDeathsData_Ingester.run()
await irelandPopulationDeathsData_Transformer.run()
}
const resultDF = await runExamplePipeline()
polars
dataframe:import pl from 'nodejs-polars'
import * as utils from "../../src/utils"
/**
* Expected total sum : 17
*/
const testShoeOwnersCSV1 = `ID,Name,Birthday,NumberOfOwnedShoePairs
1,Jean,20BC-07-12,2
2,Mathieu,25BC-09-20,3
3,Marc,31BC-03-08,5
3,Luc,18BC-07-11,7`
/**
* expected totalSum: 17
*/
const testDF1 = pl.readCSV(testShoeOwnersCSV1, { sep: "," } );
const numberOfOwnedShoePairsResult = await utils.PolarsDataFramesUtils.totalSum(`NumberOfOwnedShoePairs`,testDF1);
console.log(`numberOfOwnedShoePairsResult = [${numberOfOwnedShoePairsResult}]`);
pnpm run gen:api-docs
pnpm run build:docs:astro
# pnpm run dev:docs:astro
Generated using TypeDoc