Source code for lib.build

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
This module imports all CSV data sheet from the ``lib`` directory into a sqlite database. 
This script has to be executed each time a data sheet is added or edited.
You just need to call it without argument.

.. warning:: Each new call of the module overwrite the previous database. Make a backup before.

Included data
-------------

* Carbon steel
* Tool steel
* Stainless steel

Adding data
-----------

Only standard data from reputable and trusted sources are and should be used in the librairies. 

For materials properties, data should include in this order :

#. normalized name    
#. aliases (commercial or usual names, comma separated)
#. treatment (e.g. molded, rolled, normalized, etc.)
#. chemical formula 
#. modulus of elasticity (E, in GPa)
#. modulus of rigidity (G, in GPa)
#. Poisson's ratio (nu)
#. volumic mass (often called density, in kg/m³)
#. tensile yield strength at 0,2 % (S_y, in MPa)
#. ultimate tensile strength (S_ut, in MPa)
#. thermal expansion coefficient (alpha, in µ/°C)
#. brinell hardness
#. Rockwell B hardness
#. Rockwell C hardness
#. fatigue strength at 5E8 cycles (S_f, in MPa)
#. elongation at break (epsilon, ratio)

A ``.csv`` template is provided in the ``lib`` root.


#.    Check and ensure such data don't already exist to avoid duplicates
#.    Use the provided ``.csv`` template and open it in your favorite spreadsheet editor
#.    **IMPORTANT !** Use the same exact order of columns and symbols headers
#.    Ensure values have a correct number format in the final CSV file :

    *    numbers should use dots . as decimal separator
    *    numbers should not be into quotation marks "" meaning their format in the spreadsheet should be number
    *    numbers should not use thousands separators

#.    Include the data source (organization, book, etc.) of your data in the 1st line of the header. It will be used in the database as a comment and to ensure the trustability of the data
#.    Give your final CSV file a relevant name like ``material.csv``. For example : ``stainless-steel.csv``, ``carbon-steel.csv``, ``cast-iron.csv`` This name will be added into the database in the category column.
#.    Try to split your data into consistent files to keep less than ~ 100 entries in each files (for maintainability)
#.    Save your file into the proper directory : the one mentioning the source organization of the data or the standard used to compute/measure the data : ISO, ACNOR, SAE, ASHRAE, etc.


.. note:: 
    *    Use "dimensionless" as a unit for ratios and coefficients, and "none" if a unit is not relevant (for text) to ensure the correct column spacing.
    *    Use SI units ONLY ! British units will be converted on the fly in the code.
  
"""

import codecs
import csv
import os
import sqlite3
import sys


CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))
PARENT_PATH = os.path.dirname(CURRENT_PATH)
sys.path.append(PARENT_PATH)

#TODO: update instead of overwriting previous database

#TODO: keep consistency if CSV columns headers don't match with database headers
    
[docs]def database_handle(db): """ Create or overwrite the sqlite3 database and open a connection if it does. :param db: database filename :type db: str :return: connection socket to the sqlite3 database """ database = open(os.path.abspath(db), "w") database.close() connection = sqlite3.connect(db) return connection
[docs]def create_tables(cursor, prop): """ Create the relevant tables into the database :param cursor: sqlite 3 cursor :param prop: the desired property - for now : "materials" """ if prop == "materials": cursor.execute("""CREATE TABLE IF NOT EXISTS materials (category text, -- category of material e.g. stainless steel, or tools steel, or carbon steel, or brass etc. source text, -- source of the data, put in the data-sheet header name text not null, -- normalized name of the material e.g. 4130, or 1010 aliases text, -- commercial or usual names coma separated e.g. Chromoly treatment text, -- description of thermical and mechanical manufacturing chemical formula text, -- list of chemical species E real, -- GPa - modulus of elasticity G real, -- GPa - modulus of rigidity nu real, -- dimensionless - Poisson's ratio rho real, -- kg/m^3 - volumic mass S_y real, -- MPa - tensile yield strength at 0.2% offset S_ut real, -- MPa - ultimate tensile strength alpha real, -- E-6 / °C - thermal expansion coefficient HB integer, -- Brinell hardness HRB integer, -- Rockwell B hardness HRC integer, -- Rockwell C hardness S_f real, -- MPa - fatigue strength at 5E8 cycles epsilon real -- dimensionless - elongation at break ) """)
[docs]def check_pattern(line, prop): """ Check and ensure that the properties are given in the expected order to avoid mixing up the columns :param line: the header line corresponding to the symbols of the given columns :type line: str list :param prop: the desired property to check :return: raise an error if given and expected headers don't match """ if prop == 'materials': pattern = ['symbol', 'none', 'none', 'none', 'E', # GPa 'G', # GPa 'nu', # dimensionless 'rho', # kg/m^3 'S_y', # MPa 'S_ut', # MPa 'alpha', # E-6/°C 'HB', 'HRB', 'HRC', 'S_f', # MPa 'epsilon' ] if pattern != line: raise NameError( "Database pattern and CSV file pattern do not match \n Given : \t%s \n Expected :\t%s" % (line, pattern))
[docs]def SI_convert(line, prop): """ Convert usual engineering units & multiples to SI units to normalize database .. note:: the units and multiple handled from the CSV data-sheet are considered\ given in usual engineering habits (GPa, MPa, etc.) because they are human-readable\ and standard tables are given is such units. \ However, to make the database unit-independant and facilitate later querying,\ multiples have to be normalized into SI standards. """ if prop == "materials": # Mandatory values line[4] = float(line[4]) * 1E9 #: E : Convert GPa in Pa (SI) line[5] = float(line[5]) * 1E9 #: G : Convert GPa in Pa (SI) line[8] = float(line[8]) * 1E6 #: S_y : Convert MPa in Pa (SI) line[9] = float(line[9]) * 1E6 #: S_ut : Convert MPa in Pa (SI) # Optional values try: line[10] = float(line[10]) * 1E-6 #: alpha : Convert µ/°C in 1/°C except ValueError: print("Warning : Thermal expansion coefficient is missing and will be ignored") return line
[docs]def insert_property(prop, cursor, comment, category, line): """ Build the database insertion with the relevant formatted data :param prop: the property to build :param cursor: the database cursor handler :param comment: the comment to add in every insertion, namely : the source of the data - should be the first line of the CSV data-sheet :param category: the category of the property to add. For exemple : cast iron, stainless steel, carbon steel, etc. :param line: the data line from the CSV data-sheet :type prop: string :type cursor: sqlite3 handler :type comment: string :type category: string :type line: list of strings """ if prop == "materials": line = SI_convert(line, prop) save = tuple([category, comment] + line) print(save) cursor.execute("""INSERT INTO materials VALUES ( ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? ) """ , save)
[docs]def parse_csv(PATH, cursor, database, prop): """ Parse the CSV files and process their data :param PATH: root of all the librairies :param cursor: sqlite3 handler :param database: sqlite file :param prop: the given property """ lines_added = 0 for (directory, _, files) in os.walk(PATH + prop): for f in files: if f.endswith('.csv'): print(os.path.join(directory, f)) with codecs.open(os.path.join(directory, f), 'r', encoding="utf-8", errors="ignore") as csv_file: # File name : extract the category of the data-sheet category = f.replace('.csv', '').replace('-', ' ') reader = csv.reader(csv_file, delimiter='\t') i = 0 for line in reader: if i == 0: # First line : extract the comment/source of the # data comment = line[0] if i == 2: # Third line : extract the columns symbols header # and check them check_pattern(line, prop) if i > 3: # Fourth line and following : insert data into # database insert_property( prop, cursor, comment, category, line) i = i + 1 lines_added = lines_added + 1 print(lines_added, " lines added")
[docs]def build_property(prop): """ Sequence of functions to build a new property into the database """ PATH = os.path.dirname(prop) db = database_handle('properties.db') cursor = db.cursor() create_tables(cursor, prop) parse_csv(PATH, cursor, db, prop) db.commit() db.close()
if __name__ == '__main__': properties = ["materials"] for element in properties: build_property(element)