commit a60f6c379e53d58864d48edeee11fc842daf5f4a Author: bytedream Date: Wed Jan 17 17:37:05 2024 +0100 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c5855b7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.venv +*.db diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..cdf199e --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 bytedream + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/main.py b/main.py new file mode 100644 index 0000000..8aac2d0 --- /dev/null +++ b/main.py @@ -0,0 +1,210 @@ +import argparse +from datetime import datetime +import logging +import sqlite3 +import time + +import requests + + +def check_sqlite(conn: sqlite3.Connection): + filament_table = ''' + CREATE TABLE IF NOT EXISTS filaments ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + -- internal bambulab id + store_id INTEGER UNIQUE NOT NULL, + -- region where the filament is stored + region TEXT NOT NULL, + -- filament title (e.g. PLA Basic, PLA Matte, ...) + title TEXT NOT NULL, + -- filament vendor (e.g. Bambu Lab, Bambu Lab EU, Bambu Lab US) + vendor TEXT NOT NULL, + -- filament type (e.g. PLA Basic, PLA Matte, ...). unlike `store_id`, this is the same across all languages + type TEXT NOT NULL, + -- date when this filament got created / published + created INTEGER NOT NULL + ) + ''' + filament_store_id_index = ''' + CREATE INDEX IF NOT EXISTS filaments_store_id_idx ON filaments (store_id) + ''' + + filament_variant_tables = ''' + CREATE TABLE IF NOT EXISTS filament_variants ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + -- internal bambulab id + store_id INTEGER UNIQUE NOT NULL, + -- in most cases the type of the filament (e.g. Filament with spool, Refill, ...). might be something other with special products, e.g. a filament name in starter packs + option1 TEXT, + -- in most cases the actual filament weight. might be something other with special products, e.g. a filament name in starter packs + option2 TEXT, + -- in most cases the filament color. might be something other with special products, e.g. a filament name in starter packs + option3 TEXT, + -- identifier between different languages. might be null if the variant contains multiple filaments (e.g. this is the case with starter packs) + sku TEXT, + -- grams of the filament (+ the spool if applicable). might be null if the variant contains multiple filaments (e.g. this is the case with starter packs) + grams REAL, + -- date when this variant got created / published + created INTEGER NOT NULL, + -- relation to parent filament + filament_id INTEGER NOT NULL, + FOREIGN KEY (filament_id) REFERENCES filaments (id) + ) + ''' + filament_variant_store_id_index = ''' + CREATE INDEX IF NOT EXISTS filament_variants_store_id_idx ON filament_variants (store_id) + ''' + + measurement_table = ''' + CREATE TABLE IF NOT EXISTS measurements ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + -- timestamp of a measurement + timestamp INTEGER NOT NULL + ) + ''' + measurement_timestamp_index = ''' + CREATE INDEX IF NOT EXISTS measurements_timestamp_idx ON measurements (timestamp) + ''' + + availability_table = ''' + CREATE TABLE IF NOT EXISTS availability ( + -- 0 if not available, 1 if available + available INTEGER NOT NULL, + measurement_id INTEGER NOT NULL, + filament_variant_id INTEGER NOT NULL, + FOREIGN KEY (measurement_id) REFERENCES measurements (id), + FOREIGN KEY (filament_variant_id) REFERENCES filament_variants (id) + ) + ''' + + price_table = ''' + CREATE TABLE IF NOT EXISTS prices ( + price REAL NOT NULL, + measurement_id INTEGER NOT NULL, + filament_variant_id INTEGER NOT NULL, + FOREIGN KEY (measurement_id) REFERENCES measurements (id), + FOREIGN KEY (filament_variant_id) REFERENCES filament_variants (id) + ) + ''' + + conn.execute(filament_table) + conn.execute(filament_store_id_index) + conn.execute(filament_variant_tables) + conn.execute(filament_variant_store_id_index) + conn.execute(measurement_table) + conn.execute(measurement_timestamp_index) + conn.execute(availability_table) + conn.execute(price_table) + + conn.commit() + + +def cmd(conn: sqlite3.Connection): + timestamp = int(time.time()) + logging.info('scraping at %d', timestamp) + + global_products = requests.request('GET', 'https://store.bambulab.com/collections/bambu-lab-3d-printer-filament/products.json').json() + us_products = requests.request('GET', 'https://us.store.bambulab.com/collections/bambu-lab-3d-printer-filament/products.json').json() + eu_products = requests.request('GET', 'https://eu.store.bambulab.com/collections/bambu-lab-3d-printer-filament/products.json').json() + uk_products = requests.request('GET', 'https://uk.store.bambulab.com/collections/bambu-lab-3d-printer-filament/products.json').json() + au_products = requests.request('GET', 'https://au.store.bambulab.com/collections/bambu-lab-3d-printer-filament/products.json').json() + ca_products = requests.request('GET', 'https://ca.store.bambulab.com/collections/bambu-lab-3d-printer-filament/products.json').json() + jp_products = requests.request('GET', 'https://jp.store.bambulab.com/collections/bambu-lab-3d-printer-filament/products.json').json() + + all_products = { + 'global': global_products['products'], + 'us': us_products['products'], + 'eu': eu_products['products'], + 'uk': uk_products['products'], + 'au': au_products['products'], + 'ca': ca_products['products'], + 'jp': jp_products['products'] + } + + cur = conn.cursor() + + availability = {} + prices = {} + + all_filament_ids: list[tuple[int, int]] = cur.execute('SELECT id, store_id FROM filaments').fetchall() + all_filament_ids: dict[int, int] = {ids[1]: ids[0] for ids in all_filament_ids} + all_variant_ids: list[tuple[int, int]] = cur.execute('SELECT id, store_id FROM filament_variants').fetchall() + all_variant_ids: dict[int, int] = {ids[1]: ids[0] for ids in all_variant_ids} + + for region, products in all_products.items(): + for product in products: + # get the internal id for the filament or insert it if it does not exist + if (filament_id := all_filament_ids.get(product['id'])) is None: + cur.execute('INSERT INTO filaments (store_id, region, title, vendor, type, created) VALUES (?, ?, ?, ?, ?, ?)', ( + product['id'], + region, + product['title'], + product['vendor'], + product['product_type'], + int(time.mktime(datetime.fromisoformat(product['created_at']).utctimetuple())) + )) + filament_id = cur.lastrowid + all_filament_ids[product['id']] = filament_id + + for variant in product['variants']: + # get the internal id for the filament variant or insert it if it does not exist + if (filament_variant_id := all_variant_ids.get(variant['id'])) is None: + cur.execute('INSERT INTO filament_variants (store_id, option1, option2, option3, sku, grams, created, filament_id) VALUES (?, ?, ?, ?, ?, ?, ?, ?)', ( + variant['id'], + variant['option1'], + variant['option2'], + variant['option3'], + variant['sku'] or None, + variant['grams'] or None, + int(time.mktime(datetime.fromisoformat(variant['created_at']).utctimetuple())), + filament_id, + )) + filament_variant_id = cur.lastrowid + all_variant_ids[variant['id']] = filament_variant_id + + availability[filament_variant_id] = int(variant['available']) + prices[filament_variant_id] = float(variant['price']) + + cur.execute('INSERT INTO measurements (timestamp) VALUES (?)', (timestamp,)) + measurement_id = cur.lastrowid + + last_availability_changes = cur.execute('SELECT DISTINCT filament_variant_id, last_value(available) over (ORDER BY filament_variant_id) FROM availability').fetchall() + last_price_changes = cur.execute('SELECT DISTINCT filament_variant_id, last_value(price) over (ORDER BY filament_variant_id) FROM prices').fetchall() + + for row in last_availability_changes: + if row[0] not in availability: + continue + if availability[row[0]] == row[1]: + del availability[row[0]] + + for row in last_price_changes: + if row[0] not in prices: + continue + if prices[row[0]] == row[1]: + del prices[row[0]] + + if availability: + logging.info('found %d availability changes', len(availability)) + if prices: + logging.info('found %d prices changes', len(prices)) + + cur.executemany('INSERT INTO availability (available, measurement_id, filament_variant_id) VALUES (?, ?, ?)', [(available, measurement_id, id) for id, available in availability.items()]) + cur.executemany('INSERT INTO prices (price, measurement_id, filament_variant_id) VALUES (?, ?, ?)', [(price, measurement_id, id) for id, price in prices.items()]) + + conn.commit() + + +if __name__ == '__main__': + # --- cli parser --- # + parser = argparse.ArgumentParser() + parser.add_argument('--db', help='Path to the sqlite file where the stocks should be saved in', required=True) + + # --- cli input --- # + args = parser.parse_args() + + logging.basicConfig(format='%(levelname)s [%(asctime)s] - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO) + + conn = sqlite3.connect(args.db) + check_sqlite(conn) + + cmd(conn) diff --git a/plot.ipynb b/plot.ipynb new file mode 100644 index 0000000..461782e --- /dev/null +++ b/plot.ipynb @@ -0,0 +1,109 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "import sqlite3\n", + "\n", + "import plotly.graph_objects as go\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "sqlite_file = input(\"Sqlite file: \")\n", + "conn = sqlite3.connect(sqlite_file)" + ], + "metadata": { + "collapsed": false + }, + "id": "4a0911379968e60d", + "execution_count": null + }, + { + "cell_type": "code", + "outputs": [], + "source": [ + "query = '''\n", + "SELECT timestamp, filament_variant_id, available, region\n", + " FROM availability\n", + " LEFT JOIN measurements ON availability.measurement_id = measurements.id\n", + " LEFT JOIN filament_variants ON availability.filament_variant_id = filament_variants.id\n", + " LEFT JOIN filaments ON filament_variants.filament_id = filaments.id\n", + "'''\n", + "df = pd.read_sql(query, conn)\n", + "\n", + "all_timestamps = list(df['timestamp'].unique())\n", + "all_regions = list(df['region'].unique())\n", + "# idk why there is a None in the dataset. when I manually review it there is no None region\n", + "all_regions.remove(None)\n", + "\n", + "available = {region: [] for region in all_regions}\n", + "not_available = {region: [] for region in all_regions}\n", + "\n", + "for timestamp in all_timestamps:\n", + " values = df.query(f'timestamp <= {timestamp}').drop(columns=['timestamp']).drop_duplicates(keep='last', subset=['filament_variant_id', 'region'])\n", + " \n", + " for region in all_regions:\n", + " available[region].append(len(values.query(f'(available == 1) and (region == \"{region}\")')))\n", + " not_available[region].append(len(values.query(f'(available == 0) and (region == \"{region}\")')))\n", + "\n", + "fig = go.Figure()\n", + "for region in all_regions:\n", + " timestamp_datetime = [datetime.fromtimestamp(timestamp) for timestamp in all_timestamps]\n", + " fig.add_trace(go.Scatter(x=timestamp_datetime, y=available[region], mode='markers+lines', name=f'Available ({region.upper()})'))\n", + " fig.add_trace(go.Scatter(x=timestamp_datetime, y=not_available[region], mode='markers+lines', name=f'Not available ({region.upper()})'))\n", + "fig.update_layout(title='Total availability', xaxis_title='Time', yaxis_title='Availability', hovermode='x unified')\n", + "fig.show()\n", + "\n", + "fig = go.Figure()\n", + "for region in all_regions:\n", + " timestamp_datetime = [datetime.fromtimestamp(timestamp) for timestamp in all_timestamps]\n", + " ratios = []\n", + " ratio_texts = []\n", + " for i in range(len(available[region])):\n", + " ratios.append(round((available[region][i] / (available[region][i] + not_available[region][i])) * 100, 2))\n", + " ratio_texts.append(f'({available[region][i]} / {available[region][i] + not_available[region][i]})')\n", + " \n", + " fig.add_trace(go.Scatter(x=timestamp_datetime, y=ratios, text=ratio_texts, mode='markers+lines', name=region.upper()))\n", + "fig.update_layout(title='Availability Ratio', xaxis_title='Time', yaxis_title='Availability in %', yaxis_range=[0, 100], yaxis_ticksuffix = '%', hovermode='x unified')\n", + "fig.show()" + ], + "metadata": { + "collapsed": false + }, + "id": "19ab0d8321462a7e", + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e467db1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +ipywidgets~=8.1.1 +jupyterlab~=4.0.10 +notebook~=7.0.6 +pandas~=2.1.4 +plotly~=5.18.0 +requests~=2.31.0