# Markdown is not rendered in PyCharm.
This is a website containing bird song recordings and classification. They have shared part of their collection to Global Biodiversity Information Facility
It is ok to download some files but we still need to contact for downloading the complete set. See the terms
We want to learn the individual bird sound and try to learn their songs.
For this to happen we need to
It seems the first item is https://www.xeno-canto.org/1
and currently https://www.xeno-canto.org/460846
the last.
As we have not contacted them yet we collect 5 samples each time we run this page.
import project;
# In case you want to have proces data somewhere else change it
data_dir = project.get_project_dir() + 'data/'
#data_dir = '/media/clemens/Maxtor/xeno-canto/'
project.set_data_dir(data_dir)
num_files = 5
project.print_stats()
import gbif
gbif.set_gbif_dir(project.get_gbif_dir())
gbif_id = '0025627-181108115102211'
gbif_csv = gbif.get_data()
import pandas as pd
df = pd.read_csv(gbif_csv, sep='\t')
project.fix_gbif_df(df)
# Do we have data?
df.head()
# We have some classification of the birg
df.info()
# occurenceID references to http://data.biodiversitydata.nl/xeno-canto/observation/XC######
# fetching this resource redirects to the page and not the download link
df['occurrenceID'].head().map(lambda x: x.rsplit('/', 1)[1])
df['XC_ID'].head()
xc_id='100082'
print('FP:', project.get_fragments_path(df, xc_id))
import split
args = dict(split.defaults)
args['silence_threshold'] = 0.01
args['min_silence_length'] = 1.0
args['dry_run'] = False
print('Data:', project.get_data_dir())
print('Fragments:',project.get_fragments_dir())
project.build_fragments(df.head(num_files), args)
import XenoCanto as xc
#import importlib; importlib.reload(XenoCanto)
xc.set_dir(project.get_sample_dir())
for id in df['XC_ID'].head(num_files):
xc.convert_mp3_to_wav(id)
from scipy.io import wavfile
id = '100113'
fs, data = wavfile.read(xc.get_wav_file(id))
print( 'data', data.shape)
print( 'Duration:', data.shape[0] / fs)
print( 'Channels:', data.shape[1])
import matplotlib.pyplot as plt
# Is this stereo recording useful?
plt.plot(data)
plt.show()
data[:,0][200000:200100]
data[:,1][200000:200100]
# FIX ME: what are min and max
diff = data[:,0] - data[:,1]
diff[200000:200100]
import numpy as np
plt.plot(diff)
plt.show()
### https://shallowsky.com/blog/programming/sonograms-in-python.html
# https://matplotlib.org/examples/pylab_examples/specgram_demo.html
Pxx, freqs, bins, im = plt.specgram(data[0:fs*2,0], Fs=5000)#, NFFT=1024, noverlap=900)
plt.show()
Pxx, freqs, bins, im = plt.specgram(data[:,1], Fs=500)#, NFFT=1024, noverlap=900)
plt.show()
from scipy import signal
from scipy.io import wavfile
import scipy.io.wavfile
M = 1024
freqs, times, spect = signal.spectrogram(data[:,0], fs=fs, window='hanning',
nperseg=1024, noverlap=M - 100,
detrend=False, scaling='spectrum')
freqs.size
freqs[0:20]
times.size
plt.plot(spect)
plt.show()